1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/TargetLowering.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/CodeGen/CallingConvLower.h"
16#include "llvm/CodeGen/MachineFrameInfo.h"
17#include "llvm/CodeGen/MachineFunction.h"
18#include "llvm/CodeGen/MachineJumpTableInfo.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/SelectionDAG.h"
21#include "llvm/CodeGen/TargetRegisterInfo.h"
22#include "llvm/CodeGen/TargetSubtargetInfo.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/DerivedTypes.h"
25#include "llvm/IR/GlobalVariable.h"
26#include "llvm/IR/LLVMContext.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCExpr.h"
29#include "llvm/Support/ErrorHandling.h"
30#include "llvm/Support/KnownBits.h"
31#include "llvm/Support/MathExtras.h"
32#include "llvm/Target/TargetLoweringObjectFile.h"
33#include "llvm/Target/TargetMachine.h"
34#include <cctype>
35using namespace llvm;
36
37/// NOTE: The TargetMachine owns TLOF.
38TargetLowering::TargetLowering(const TargetMachine &tm)
39 : TargetLoweringBase(tm) {}
40
41const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42 return nullptr;
43}
44
45bool TargetLowering::isPositionIndependent() const {
46 return getTargetMachine().isPositionIndependent();
47}
48
49/// Check whether a given call node is in tail position within its function. If
50/// so, it sets Chain to the input chain of the tail call.
51bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
52 SDValue &Chain) const {
53 const Function &F = DAG.getMachineFunction().getFunction();
54
55 // First, check if tail calls have been disabled in this function.
56 if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
57 return false;
58
59 // Conservatively require the attributes of the call to match those of
60 // the return. Ignore NoAlias and NonNull because they don't affect the
61 // call sequence.
62 AttributeList CallerAttrs = F.getAttributes();
63 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
64 .removeAttribute(Attribute::NoAlias)
65 .removeAttribute(Attribute::NonNull)
66 .hasAttributes())
67 return false;
68
69 // It's not safe to eliminate the sign / zero extension of the return value.
70 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
71 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
72 return false;
73
74 // Check if the only use is a function return node.
75 return isUsedByReturnOnly(Node, Chain);
76}
77
78bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
79 const uint32_t *CallerPreservedMask,
80 const SmallVectorImpl<CCValAssign> &ArgLocs,
81 const SmallVectorImpl<SDValue> &OutVals) const {
82 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
83 const CCValAssign &ArgLoc = ArgLocs[I];
84 if (!ArgLoc.isRegLoc())
85 continue;
86 MCRegister Reg = ArgLoc.getLocReg();
87 // Only look at callee saved registers.
88 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
89 continue;
90 // Check that we pass the value used for the caller.
91 // (We look for a CopyFromReg reading a virtual register that is used
92 // for the function live-in value of register Reg)
93 SDValue Value = OutVals[I];
94 if (Value->getOpcode() != ISD::CopyFromReg)
95 return false;
96 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
97 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
98 return false;
99 }
100 return true;
101}
102
103/// Set CallLoweringInfo attribute flags based on a call instruction
104/// and called function attributes.
105void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
106 unsigned ArgIdx) {
107 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
108 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
109 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
110 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
111 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
112 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
113 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
114 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
115 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
116 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
117 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
118 Alignment = Call->getParamAlign(ArgIdx);
119 ByValType = nullptr;
120 if (IsByVal)
121 ByValType = Call->getParamByValType(ArgIdx);
122 PreallocatedType = nullptr;
123 if (IsPreallocated)
124 PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
125}
126
127/// Generate a libcall taking the given operands as arguments and returning a
128/// result of type RetVT.
129std::pair<SDValue, SDValue>
130TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
131 ArrayRef<SDValue> Ops,
132 MakeLibCallOptions CallOptions,
133 const SDLoc &dl,
134 SDValue InChain) const {
135 if (!InChain)
136 InChain = DAG.getEntryNode();
137
138 TargetLowering::ArgListTy Args;
139 Args.reserve(Ops.size());
140
141 TargetLowering::ArgListEntry Entry;
142 for (unsigned i = 0; i < Ops.size(); ++i) {
143 SDValue NewOp = Ops[i];
144 Entry.Node = NewOp;
145 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
146 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
147 CallOptions.IsSExt);
148 Entry.IsZExt = !Entry.IsSExt;
149
150 if (CallOptions.IsSoften &&
151 !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
152 Entry.IsSExt = Entry.IsZExt = false;
153 }
154 Args.push_back(Entry);
155 }
156
157 if (LC == RTLIB::UNKNOWN_LIBCALL)
158 report_fatal_error("Unsupported library call operation!");
159 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
160 getPointerTy(DAG.getDataLayout()));
161
162 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
163 TargetLowering::CallLoweringInfo CLI(DAG);
164 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
165 bool zeroExtend = !signExtend;
166
167 if (CallOptions.IsSoften &&
168 !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
169 signExtend = zeroExtend = false;
170 }
171
172 CLI.setDebugLoc(dl)
173 .setChain(InChain)
174 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
175 .setNoReturn(CallOptions.DoesNotReturn)
176 .setDiscardResult(!CallOptions.IsReturnValueUsed)
177 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
178 .setSExtResult(signExtend)
179 .setZExtResult(zeroExtend);
180 return LowerCallTo(CLI);
181}
182
183bool TargetLowering::findOptimalMemOpLowering(
184 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
185 unsigned SrcAS, const AttributeList &FuncAttributes) const {
186 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
187 return false;
188
189 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
190
191 if (VT == MVT::Other) {
192 // Use the largest integer type whose alignment constraints are satisfied.
193 // We only need to check DstAlign here as SrcAlign is always greater or
194 // equal to DstAlign (or zero).
195 VT = MVT::i64;
196 if (Op.isFixedDstAlign())
197 while (
198 Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
199 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value()))
200 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
201 assert(VT.isInteger());
202
203 // Find the largest legal integer type.
204 MVT LVT = MVT::i64;
205 while (!isTypeLegal(LVT))
206 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
207 assert(LVT.isInteger());
208
209 // If the type we've chosen is larger than the largest legal integer type
210 // then use that instead.
211 if (VT.bitsGT(LVT))
212 VT = LVT;
213 }
214
215 unsigned NumMemOps = 0;
216 uint64_t Size = Op.size();
217 while (Size) {
218 unsigned VTSize = VT.getSizeInBits() / 8;
219 while (VTSize > Size) {
220 // For now, only use non-vector load / store's for the left-over pieces.
221 EVT NewVT = VT;
222 unsigned NewVTSize;
223
224 bool Found = false;
225 if (VT.isVector() || VT.isFloatingPoint()) {
226 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
227 if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
228 isSafeMemOpType(NewVT.getSimpleVT()))
229 Found = true;
230 else if (NewVT == MVT::i64 &&
231 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
232 isSafeMemOpType(MVT::f64)) {
233 // i64 is usually not legal on 32-bit targets, but f64 may be.
234 NewVT = MVT::f64;
235 Found = true;
236 }
237 }
238
239 if (!Found) {
240 do {
241 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
242 if (NewVT == MVT::i8)
243 break;
244 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
245 }
246 NewVTSize = NewVT.getSizeInBits() / 8;
247
248 // If the new VT cannot cover all of the remaining bits, then consider
249 // issuing a (or a pair of) unaligned and overlapping load / store.
250 bool Fast;
251 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
252 allowsMisalignedMemoryAccesses(
253 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 1,
254 MachineMemOperand::MONone, &Fast) &&
255 Fast)
256 VTSize = Size;
257 else {
258 VT = NewVT;
259 VTSize = NewVTSize;
260 }
261 }
262
263 if (++NumMemOps > Limit)
264 return false;
265
266 MemOps.push_back(VT);
267 Size -= VTSize;
268 }
269
270 return true;
271}
272
273/// Soften the operands of a comparison. This code is shared among BR_CC,
274/// SELECT_CC, and SETCC handlers.
275void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
276 SDValue &NewLHS, SDValue &NewRHS,
277 ISD::CondCode &CCCode,
278 const SDLoc &dl, const SDValue OldLHS,
279 const SDValue OldRHS) const {
280 SDValue Chain;
281 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
282 OldRHS, Chain);
283}
284
285void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
286 SDValue &NewLHS, SDValue &NewRHS,
287 ISD::CondCode &CCCode,
288 const SDLoc &dl, const SDValue OldLHS,
289 const SDValue OldRHS,
290 SDValue &Chain,
291 bool IsSignaling) const {
292 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
293 // not supporting it. We can update this code when libgcc provides such
294 // functions.
295
296 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
297 && "Unsupported setcc type!");
298
299 // Expand into one or more soft-fp libcall(s).
300 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
301 bool ShouldInvertCC = false;
302 switch (CCCode) {
303 case ISD::SETEQ:
304 case ISD::SETOEQ:
305 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
306 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
307 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
308 break;
309 case ISD::SETNE:
310 case ISD::SETUNE:
311 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
312 (VT == MVT::f64) ? RTLIB::UNE_F64 :
313 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
314 break;
315 case ISD::SETGE:
316 case ISD::SETOGE:
317 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
318 (VT == MVT::f64) ? RTLIB::OGE_F64 :
319 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
320 break;
321 case ISD::SETLT:
322 case ISD::SETOLT:
323 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
324 (VT == MVT::f64) ? RTLIB::OLT_F64 :
325 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
326 break;
327 case ISD::SETLE:
328 case ISD::SETOLE:
329 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
330 (VT == MVT::f64) ? RTLIB::OLE_F64 :
331 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
332 break;
333 case ISD::SETGT:
334 case ISD::SETOGT:
335 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
336 (VT == MVT::f64) ? RTLIB::OGT_F64 :
337 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
338 break;
339 case ISD::SETO:
340 ShouldInvertCC = true;
341 LLVM_FALLTHROUGH;
342 case ISD::SETUO:
343 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
344 (VT == MVT::f64) ? RTLIB::UO_F64 :
345 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
346 break;
347 case ISD::SETONE:
348 // SETONE = O && UNE
349 ShouldInvertCC = true;
350 LLVM_FALLTHROUGH;
351 case ISD::SETUEQ:
352 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
353 (VT == MVT::f64) ? RTLIB::UO_F64 :
354 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
355 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
356 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
357 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
358 break;
359 default:
360 // Invert CC for unordered comparisons
361 ShouldInvertCC = true;
362 switch (CCCode) {
363 case ISD::SETULT:
364 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
365 (VT == MVT::f64) ? RTLIB::OGE_F64 :
366 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
367 break;
368 case ISD::SETULE:
369 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
370 (VT == MVT::f64) ? RTLIB::OGT_F64 :
371 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
372 break;
373 case ISD::SETUGT:
374 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
375 (VT == MVT::f64) ? RTLIB::OLE_F64 :
376 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
377 break;
378 case ISD::SETUGE:
379 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
380 (VT == MVT::f64) ? RTLIB::OLT_F64 :
381 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
382 break;
383 default: llvm_unreachable("Do not know how to soften this setcc!");
384 }
385 }
386
387 // Use the target specific return value for comparions lib calls.
388 EVT RetVT = getCmpLibcallReturnType();
389 SDValue Ops[2] = {NewLHS, NewRHS};
390 TargetLowering::MakeLibCallOptions CallOptions;
391 EVT OpsVT[2] = { OldLHS.getValueType(),
392 OldRHS.getValueType() };
393 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
394 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
395 NewLHS = Call.first;
396 NewRHS = DAG.getConstant(0, dl, RetVT);
397
398 CCCode = getCmpLibcallCC(LC1);
399 if (ShouldInvertCC) {
400 assert(RetVT.isInteger());
401 CCCode = getSetCCInverse(CCCode, RetVT);
402 }
403
404 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
405 // Update Chain.
406 Chain = Call.second;
407 } else {
408 EVT SetCCVT =
409 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
410 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
411 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
412 CCCode = getCmpLibcallCC(LC2);
413 if (ShouldInvertCC)
414 CCCode = getSetCCInverse(CCCode, RetVT);
415 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
416 if (Chain)
417 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
418 Call2.second);
419 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
420 Tmp.getValueType(), Tmp, NewLHS);
421 NewRHS = SDValue();
422 }
423}
424
425/// Return the entry encoding for a jump table in the current function. The
426/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
427unsigned TargetLowering::getJumpTableEncoding() const {
428 // In non-pic modes, just use the address of a block.
429 if (!isPositionIndependent())
430 return MachineJumpTableInfo::EK_BlockAddress;
431
432 // In PIC mode, if the target supports a GPRel32 directive, use it.
433 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
434 return MachineJumpTableInfo::EK_GPRel32BlockAddress;
435
436 // Otherwise, use a label difference.
437 return MachineJumpTableInfo::EK_LabelDifference32;
438}
439
440SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
441 SelectionDAG &DAG) const {
442 // If our PIC model is GP relative, use the global offset table as the base.
443 unsigned JTEncoding = getJumpTableEncoding();
444
445 if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
446 (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
447 return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
448
449 return Table;
450}
451
452/// This returns the relocation base for the given PIC jumptable, the same as
453/// getPICJumpTableRelocBase, but as an MCExpr.
454const MCExpr *
455TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
456 unsigned JTI,MCContext &Ctx) const{
457 // The normal PIC reloc base is the label at the start of the jump table.
458 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
459}
460
461bool
462TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
463 const TargetMachine &TM = getTargetMachine();
464 const GlobalValue *GV = GA->getGlobal();
465
466 // If the address is not even local to this DSO we will have to load it from
467 // a got and then add the offset.
468 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
469 return false;
470
471 // If the code is position independent we will have to add a base register.
472 if (isPositionIndependent())
473 return false;
474
475 // Otherwise we can do it.
476 return true;
477}
478
479//===----------------------------------------------------------------------===//
480// Optimization Methods
481//===----------------------------------------------------------------------===//
482
483/// If the specified instruction has a constant integer operand and there are
484/// bits set in that constant that are not demanded, then clear those bits and
485/// return true.
486bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
487 const APInt &DemandedBits,
488 const APInt &DemandedElts,
489 TargetLoweringOpt &TLO) const {
490 SDLoc DL(Op);
491 unsigned Opcode = Op.getOpcode();
492
493 // Do target-specific constant optimization.
494 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
495 return TLO.New.getNode();
496
497 // FIXME: ISD::SELECT, ISD::SELECT_CC
498 switch (Opcode) {
499 default:
500 break;
501 case ISD::XOR:
502 case ISD::AND:
503 case ISD::OR: {
504 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
505 if (!Op1C)
506 return false;
507
508 // If this is a 'not' op, don't touch it because that's a canonical form.
509 const APInt &C = Op1C->getAPIntValue();
510 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
511 return false;
512
513 if (!C.isSubsetOf(DemandedBits)) {
514 EVT VT = Op.getValueType();
515 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
516 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
517 return TLO.CombineTo(Op, NewOp);
518 }
519
520 break;
521 }
522 }
523
524 return false;
525}
526
527bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
528 const APInt &DemandedBits,
529 TargetLoweringOpt &TLO) const {
530 EVT VT = Op.getValueType();
531 APInt DemandedElts = VT.isVector()
532 ? APInt::getAllOnesValue(VT.getVectorNumElements())
533 : APInt(1, 1);
534 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
535}
536
537/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
538/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
539/// generalized for targets with other types of implicit widening casts.
540bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
541 const APInt &Demanded,
542 TargetLoweringOpt &TLO) const {
543 assert(Op.getNumOperands() == 2 &&
544 "ShrinkDemandedOp only supports binary operators!");
545 assert(Op.getNode()->getNumValues() == 1 &&
546 "ShrinkDemandedOp only supports nodes with one result!");
547
548 SelectionDAG &DAG = TLO.DAG;
549 SDLoc dl(Op);
550
551 // Early return, as this function cannot handle vector types.
552 if (Op.getValueType().isVector())
553 return false;
554
555 // Don't do this if the node has another user, which may require the
556 // full value.
557 if (!Op.getNode()->hasOneUse())
558 return false;
559
560 // Search for the smallest integer type with free casts to and from
561 // Op's type. For expedience, just check power-of-2 integer types.
562 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
563 unsigned DemandedSize = Demanded.getActiveBits();
564 unsigned SmallVTBits = DemandedSize;
565 if (!isPowerOf2_32(SmallVTBits))
566 SmallVTBits = NextPowerOf2(SmallVTBits);
567 for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
568 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
569 if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
570 TLI.isZExtFree(SmallVT, Op.getValueType())) {
571 // We found a type with free casts.
572 SDValue X = DAG.getNode(
573 Op.getOpcode(), dl, SmallVT,
574 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
575 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
576 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
577 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
578 return TLO.CombineTo(Op, Z);
579 }
580 }
581 return false;
582}
583
584bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
585 DAGCombinerInfo &DCI) const {
586 SelectionDAG &DAG = DCI.DAG;
587 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
588 !DCI.isBeforeLegalizeOps());
589 KnownBits Known;
590
591 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
592 if (Simplified) {
593 DCI.AddToWorklist(Op.getNode());
594 DCI.CommitTargetLoweringOpt(TLO);
595 }
596 return Simplified;
597}
598
599bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
600 KnownBits &Known,
601 TargetLoweringOpt &TLO,
602 unsigned Depth,
603 bool AssumeSingleUse) const {
604 EVT VT = Op.getValueType();
605
606 // TODO: We can probably do more work on calculating the known bits and
607 // simplifying the operations for scalable vectors, but for now we just
608 // bail out.
609 if (VT.isScalableVector()) {
610 // Pretend we don't know anything for now.
611 Known = KnownBits(DemandedBits.getBitWidth());
612 return false;
613 }
614
615 APInt DemandedElts = VT.isVector()
616 ? APInt::getAllOnesValue(VT.getVectorNumElements())
617 : APInt(1, 1);
618 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
619 AssumeSingleUse);
620}
621
622// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
623// TODO: Under what circumstances can we create nodes? Constant folding?
624SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
625 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
626 SelectionDAG &DAG, unsigned Depth) const {
627 // Limit search depth.
628 if (Depth >= SelectionDAG::MaxRecursionDepth)
629 return SDValue();
630
631 // Ignore UNDEFs.
632 if (Op.isUndef())
633 return SDValue();
634
635 // Not demanding any bits/elts from Op.
636 if (DemandedBits == 0 || DemandedElts == 0)
637 return DAG.getUNDEF(Op.getValueType());
638
639 unsigned NumElts = DemandedElts.getBitWidth();
640 unsigned BitWidth = DemandedBits.getBitWidth();
641 KnownBits LHSKnown, RHSKnown;
642 switch (Op.getOpcode()) {
643 case ISD::BITCAST: {
644 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
645 EVT SrcVT = Src.getValueType();
646 EVT DstVT = Op.getValueType();
647 if (SrcVT == DstVT)
648 return Src;
649
650 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
651 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
652 if (NumSrcEltBits == NumDstEltBits)
653 if (SDValue V = SimplifyMultipleUseDemandedBits(
654 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
655 return DAG.getBitcast(DstVT, V);
656
657 // TODO - bigendian once we have test coverage.
658 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
659 DAG.getDataLayout().isLittleEndian()) {
660 unsigned Scale = NumDstEltBits / NumSrcEltBits;
661 unsigned NumSrcElts = SrcVT.getVectorNumElements();
662 APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
663 APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
664 for (unsigned i = 0; i != Scale; ++i) {
665 unsigned Offset = i * NumSrcEltBits;
666 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
667 if (!Sub.isNullValue()) {
668 DemandedSrcBits |= Sub;
669 for (unsigned j = 0; j != NumElts; ++j)
670 if (DemandedElts[j])
671 DemandedSrcElts.setBit((j * Scale) + i);
672 }
673 }
674
675 if (SDValue V = SimplifyMultipleUseDemandedBits(
676 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
677 return DAG.getBitcast(DstVT, V);
678 }
679
680 // TODO - bigendian once we have test coverage.
681 if ((NumSrcEltBits % NumDstEltBits) == 0 &&
682 DAG.getDataLayout().isLittleEndian()) {
683 unsigned Scale = NumSrcEltBits / NumDstEltBits;
684 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
685 APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
686 APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
687 for (unsigned i = 0; i != NumElts; ++i)
688 if (DemandedElts[i]) {
689 unsigned Offset = (i % Scale) * NumDstEltBits;
690 DemandedSrcBits.insertBits(DemandedBits, Offset);
691 DemandedSrcElts.setBit(i / Scale);
692 }
693
694 if (SDValue V = SimplifyMultipleUseDemandedBits(
695 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
696 return DAG.getBitcast(DstVT, V);
697 }
698
699 break;
700 }
701 case ISD::AND: {
702 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
703 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
704
705 // If all of the demanded bits are known 1 on one side, return the other.
706 // These bits cannot contribute to the result of the 'and' in this
707 // context.
708 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
709 return Op.getOperand(0);
710 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
711 return Op.getOperand(1);
712 break;
713 }
714 case ISD::OR: {
715 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
716 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
717
718 // If all of the demanded bits are known zero on one side, return the
719 // other. These bits cannot contribute to the result of the 'or' in this
720 // context.
721 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
722 return Op.getOperand(0);
723 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
724 return Op.getOperand(1);
725 break;
726 }
727 case ISD::XOR: {
728 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
729 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
730
731 // If all of the demanded bits are known zero on one side, return the
732 // other.
733 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
734 return Op.getOperand(0);
735 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
736 return Op.getOperand(1);
737 break;
738 }
739 case ISD::SHL: {
740 // If we are only demanding sign bits then we can use the shift source
741 // directly.
742 if (const APInt *MaxSA =
743 DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
744 SDValue Op0 = Op.getOperand(0);
745 unsigned ShAmt = MaxSA->getZExtValue();
746 unsigned NumSignBits =
747 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
748 unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
749 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
750 return Op0;
751 }
752 break;
753 }
754 case ISD::SETCC: {
755 SDValue Op0 = Op.getOperand(0);
756 SDValue Op1 = Op.getOperand(1);
757 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
758 // If (1) we only need the sign-bit, (2) the setcc operands are the same
759 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
760 // -1, we may be able to bypass the setcc.
761 if (DemandedBits.isSignMask() &&
762 Op0.getScalarValueSizeInBits() == BitWidth &&
763 getBooleanContents(Op0.getValueType()) ==
764 BooleanContent::ZeroOrNegativeOneBooleanContent) {
765 // If we're testing X < 0, then this compare isn't needed - just use X!
766 // FIXME: We're limiting to integer types here, but this should also work
767 // if we don't care about FP signed-zero. The use of SETLT with FP means
768 // that we don't care about NaNs.
769 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
770 (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
771 return Op0;
772 }
773 break;
774 }
775 case ISD::SIGN_EXTEND_INREG: {
776 // If none of the extended bits are demanded, eliminate the sextinreg.
777 SDValue Op0 = Op.getOperand(0);
778 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
779 unsigned ExBits = ExVT.getScalarSizeInBits();
780 if (DemandedBits.getActiveBits() <= ExBits)
781 return Op0;
782 // If the input is already sign extended, just drop the extension.
783 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
784 if (NumSignBits >= (BitWidth - ExBits + 1))
785 return Op0;
786 break;
787 }
788 case ISD::ANY_EXTEND_VECTOR_INREG:
789 case ISD::SIGN_EXTEND_VECTOR_INREG:
790 case ISD::ZERO_EXTEND_VECTOR_INREG: {
791 // If we only want the lowest element and none of extended bits, then we can
792 // return the bitcasted source vector.
793 SDValue Src = Op.getOperand(0);
794 EVT SrcVT = Src.getValueType();
795 EVT DstVT = Op.getValueType();
796 if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
797 DAG.getDataLayout().isLittleEndian() &&
798 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
799 return DAG.getBitcast(DstVT, Src);
800 }
801 break;
802 }
803 case ISD::INSERT_VECTOR_ELT: {
804 // If we don't demand the inserted element, return the base vector.
805 SDValue Vec = Op.getOperand(0);
806 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
807 EVT VecVT = Vec.getValueType();
808 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
809 !DemandedElts[CIdx->getZExtValue()])
810 return Vec;
811 break;
812 }
813 case ISD::INSERT_SUBVECTOR: {
814 // If we don't demand the inserted subvector, return the base vector.
815 SDValue Vec = Op.getOperand(0);
816 SDValue Sub = Op.getOperand(1);
817 uint64_t Idx = Op.getConstantOperandVal(2);
818 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
819 if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
820 return Vec;
821 break;
822 }
823 case ISD::VECTOR_SHUFFLE: {
824 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
825
826 // If all the demanded elts are from one operand and are inline,
827 // then we can use the operand directly.
828 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
829 for (unsigned i = 0; i != NumElts; ++i) {
830 int M = ShuffleMask[i];
831 if (M < 0 || !DemandedElts[i])
832 continue;
833 AllUndef = false;
834 IdentityLHS &= (M == (int)i);
835 IdentityRHS &= ((M - NumElts) == i);
836 }
837
838 if (AllUndef)
839 return DAG.getUNDEF(Op.getValueType());
840 if (IdentityLHS)
841 return Op.getOperand(0);
842 if (IdentityRHS)
843 return Op.getOperand(1);
844 break;
845 }
846 default:
847 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
848 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
849 Op, DemandedBits, DemandedElts, DAG, Depth))
850 return V;
851 break;
852 }
853 return SDValue();
854}
855
856SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
857 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
858 unsigned Depth) const {
859 EVT VT = Op.getValueType();
860 APInt DemandedElts = VT.isVector()
861 ? APInt::getAllOnesValue(VT.getVectorNumElements())
862 : APInt(1, 1);
863 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
864 Depth);
865}
866
867SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
868 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
869 unsigned Depth) const {
870 APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
871 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
872 Depth);
873}
874
875/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
876/// result of Op are ever used downstream. If we can use this information to
877/// simplify Op, create a new simplified DAG node and return true, returning the
878/// original and new nodes in Old and New. Otherwise, analyze the expression and
879/// return a mask of Known bits for the expression (used to simplify the
880/// caller). The Known bits may only be accurate for those bits in the
881/// OriginalDemandedBits and OriginalDemandedElts.
882bool TargetLowering::SimplifyDemandedBits(
883 SDValue Op, const APInt &OriginalDemandedBits,
884 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
885 unsigned Depth, bool AssumeSingleUse) const {
886 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
887 assert(Op.getScalarValueSizeInBits() == BitWidth &&
888 "Mask size mismatches value type size!");
889
890 // Don't know anything.
891 Known = KnownBits(BitWidth);
892
893 // TODO: We can probably do more work on calculating the known bits and
894 // simplifying the operations for scalable vectors, but for now we just
895 // bail out.
896 if (Op.getValueType().isScalableVector())
897 return false;
898
899 unsigned NumElts = OriginalDemandedElts.getBitWidth();
900 assert((!Op.getValueType().isVector() ||
901 NumElts == Op.getValueType().getVectorNumElements()) &&
902 "Unexpected vector size");
903
904 APInt DemandedBits = OriginalDemandedBits;
905 APInt DemandedElts = OriginalDemandedElts;
906 SDLoc dl(Op);
907 auto &DL = TLO.DAG.getDataLayout();
908
909 // Undef operand.
910 if (Op.isUndef())
911 return false;
912
913 if (Op.getOpcode() == ISD::Constant) {
914 // We know all of the bits for a constant!
915 Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
916 return false;
917 }
918
919 if (Op.getOpcode() == ISD::ConstantFP) {
920 // We know all of the bits for a floating point constant!
921 Known = KnownBits::makeConstant(
922 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
923 return false;
924 }
925
926 // Other users may use these bits.
927 EVT VT = Op.getValueType();
928 if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
929 if (Depth != 0) {
930 // If not at the root, Just compute the Known bits to
931 // simplify things downstream.
932 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
933 return false;
934 }
935 // If this is the root being simplified, allow it to have multiple uses,
936 // just set the DemandedBits/Elts to all bits.
937 DemandedBits = APInt::getAllOnesValue(BitWidth);
938 DemandedElts = APInt::getAllOnesValue(NumElts);
939 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
940 // Not demanding any bits/elts from Op.
941 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
942 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
943 // Limit search depth.
944 return false;
945 }
946
947 KnownBits Known2;
948 switch (Op.getOpcode()) {
949 case ISD::TargetConstant:
950 llvm_unreachable("Can't simplify this node");
951 case ISD::SCALAR_TO_VECTOR: {
952 if (!DemandedElts[0])
953 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
954
955 KnownBits SrcKnown;
956 SDValue Src = Op.getOperand(0);
957 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
958 APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
959 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
960 return true;
961
962 // Upper elements are undef, so only get the knownbits if we just demand
963 // the bottom element.
964 if (DemandedElts == 1)
965 Known = SrcKnown.anyextOrTrunc(BitWidth);
966 break;
967 }
968 case ISD::BUILD_VECTOR:
969 // Collect the known bits that are shared by every demanded element.
970 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
971 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
972 return false; // Don't fall through, will infinitely loop.
973 case ISD::LOAD: {
974 LoadSDNode *LD = cast<LoadSDNode>(Op);
975 if (getTargetConstantFromLoad(LD)) {
976 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
977 return false; // Don't fall through, will infinitely loop.
978 } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
979 // If this is a ZEXTLoad and we are looking at the loaded value.
980 EVT MemVT = LD->getMemoryVT();
981 unsigned MemBits = MemVT.getScalarSizeInBits();
982 Known.Zero.setBitsFrom(MemBits);
983 return false; // Don't fall through, will infinitely loop.
984 }
985 break;
986 }
987 case ISD::INSERT_VECTOR_ELT: {
988 SDValue Vec = Op.getOperand(0);
989 SDValue Scl = Op.getOperand(1);
990 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
991 EVT VecVT = Vec.getValueType();
992
993 // If index isn't constant, assume we need all vector elements AND the
994 // inserted element.
995 APInt DemandedVecElts(DemandedElts);
996 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
997 unsigned Idx = CIdx->getZExtValue();
998 DemandedVecElts.clearBit(Idx);
999
1000 // Inserted element is not required.
1001 if (!DemandedElts[Idx])
1002 return TLO.CombineTo(Op, Vec);
1003 }
1004
1005 KnownBits KnownScl;
1006 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1007 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1008 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1009 return true;
1010
1011 Known = KnownScl.anyextOrTrunc(BitWidth);
1012
1013 KnownBits KnownVec;
1014 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1015 Depth + 1))
1016 return true;
1017
1018 if (!!DemandedVecElts)
1019 Known = KnownBits::commonBits(Known, KnownVec);
1020
1021 return false;
1022 }
1023 case ISD::INSERT_SUBVECTOR: {
1024 // Demand any elements from the subvector and the remainder from the src its
1025 // inserted into.
1026 SDValue Src = Op.getOperand(0);
1027 SDValue Sub = Op.getOperand(1);
1028 uint64_t Idx = Op.getConstantOperandVal(2);
1029 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1030 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1031 APInt DemandedSrcElts = DemandedElts;
1032 DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
1033
1034 KnownBits KnownSub, KnownSrc;
1035 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1036 Depth + 1))
1037 return true;
1038 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1039 Depth + 1))
1040 return true;
1041
1042 Known.Zero.setAllBits();
1043 Known.One.setAllBits();
1044 if (!!DemandedSubElts)
1045 Known = KnownBits::commonBits(Known, KnownSub);
1046 if (!!DemandedSrcElts)
1047 Known = KnownBits::commonBits(Known, KnownSrc);
1048
1049 // Attempt to avoid multi-use src if we don't need anything from it.
1050 if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
1051 !DemandedSrcElts.isAllOnesValue()) {
1052 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1053 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1054 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1055 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1056 if (NewSub || NewSrc) {
1057 NewSub = NewSub ? NewSub : Sub;
1058 NewSrc = NewSrc ? NewSrc : Src;
1059 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1060 Op.getOperand(2));
1061 return TLO.CombineTo(Op, NewOp);
1062 }
1063 }
1064 break;
1065 }
1066 case ISD::EXTRACT_SUBVECTOR: {
1067 // Offset the demanded elts by the subvector index.
1068 SDValue Src = Op.getOperand(0);
1069 if (Src.getValueType().isScalableVector())
1070 break;
1071 uint64_t Idx = Op.getConstantOperandVal(1);
1072 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1073 APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
1074
1075 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1076 Depth + 1))
1077 return true;
1078
1079 // Attempt to avoid multi-use src if we don't need anything from it.
1080 if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
1081 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1082 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1083 if (DemandedSrc) {
1084 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1085 Op.getOperand(1));
1086 return TLO.CombineTo(Op, NewOp);
1087 }
1088 }
1089 break;
1090 }
1091 case ISD::CONCAT_VECTORS: {
1092 Known.Zero.setAllBits();
1093 Known.One.setAllBits();
1094 EVT SubVT = Op.getOperand(0).getValueType();
1095 unsigned NumSubVecs = Op.getNumOperands();
1096 unsigned NumSubElts = SubVT.getVectorNumElements();
1097 for (unsigned i = 0; i != NumSubVecs; ++i) {
1098 APInt DemandedSubElts =
1099 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1100 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1101 Known2, TLO, Depth + 1))
1102 return true;
1103 // Known bits are shared by every demanded subvector element.
1104 if (!!DemandedSubElts)
1105 Known = KnownBits::commonBits(Known, Known2);
1106 }
1107 break;
1108 }
1109 case ISD::VECTOR_SHUFFLE: {
1110 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1111
1112 // Collect demanded elements from shuffle operands..
1113 APInt DemandedLHS(NumElts, 0);
1114 APInt DemandedRHS(NumElts, 0);
1115 for (unsigned i = 0; i != NumElts; ++i) {
1116 if (!DemandedElts[i])
1117 continue;
1118 int M = ShuffleMask[i];
1119 if (M < 0) {
1120 // For UNDEF elements, we don't know anything about the common state of
1121 // the shuffle result.
1122 DemandedLHS.clearAllBits();
1123 DemandedRHS.clearAllBits();
1124 break;
1125 }
1126 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1127 if (M < (int)NumElts)
1128 DemandedLHS.setBit(M);
1129 else
1130 DemandedRHS.setBit(M - NumElts);
1131 }
1132
1133 if (!!DemandedLHS || !!DemandedRHS) {
1134 SDValue Op0 = Op.getOperand(0);
1135 SDValue Op1 = Op.getOperand(1);
1136
1137 Known.Zero.setAllBits();
1138 Known.One.setAllBits();
1139 if (!!DemandedLHS) {
1140 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1141 Depth + 1))
1142 return true;
1143 Known = KnownBits::commonBits(Known, Known2);
1144 }
1145 if (!!DemandedRHS) {
1146 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1147 Depth + 1))
1148 return true;
1149 Known = KnownBits::commonBits(Known, Known2);
1150 }
1151
1152 // Attempt to avoid multi-use ops if we don't need anything from them.
1153 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1154 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1155 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1156 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1157 if (DemandedOp0 || DemandedOp1) {
1158 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1159 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1160 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1161 return TLO.CombineTo(Op, NewOp);
1162 }
1163 }
1164 break;
1165 }
1166 case ISD::AND: {
1167 SDValue Op0 = Op.getOperand(0);
1168 SDValue Op1 = Op.getOperand(1);
1169
1170 // If the RHS is a constant, check to see if the LHS would be zero without
1171 // using the bits from the RHS. Below, we use knowledge about the RHS to
1172 // simplify the LHS, here we're using information from the LHS to simplify
1173 // the RHS.
1174 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1175 // Do not increment Depth here; that can cause an infinite loop.
1176 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1177 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1178 if ((LHSKnown.Zero & DemandedBits) ==
1179 (~RHSC->getAPIntValue() & DemandedBits))
1180 return TLO.CombineTo(Op, Op0);
1181
1182 // If any of the set bits in the RHS are known zero on the LHS, shrink
1183 // the constant.
1184 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1185 DemandedElts, TLO))
1186 return true;
1187
1188 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1189 // constant, but if this 'and' is only clearing bits that were just set by
1190 // the xor, then this 'and' can be eliminated by shrinking the mask of
1191 // the xor. For example, for a 32-bit X:
1192 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1193 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1194 LHSKnown.One == ~RHSC->getAPIntValue()) {
1195 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1196 return TLO.CombineTo(Op, Xor);
1197 }
1198 }
1199
1200 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1201 Depth + 1))
1202 return true;
1203 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1204 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1205 Known2, TLO, Depth + 1))
1206 return true;
1207 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1208
1209 // Attempt to avoid multi-use ops if we don't need anything from them.
1210 if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1211 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1212 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1213 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1214 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1215 if (DemandedOp0 || DemandedOp1) {
1216 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1217 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1218 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1219 return TLO.CombineTo(Op, NewOp);
1220 }
1221 }
1222
1223 // If all of the demanded bits are known one on one side, return the other.
1224 // These bits cannot contribute to the result of the 'and'.
1225 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1226 return TLO.CombineTo(Op, Op0);
1227 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1228 return TLO.CombineTo(Op, Op1);
1229 // If all of the demanded bits in the inputs are known zeros, return zero.
1230 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1231 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1232 // If the RHS is a constant, see if we can simplify it.
1233 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1234 TLO))
1235 return true;
1236 // If the operation can be done in a smaller type, do so.
1237 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1238 return true;
1239
1240 Known &= Known2;
1241 break;
1242 }
1243 case ISD::OR: {
1244 SDValue Op0 = Op.getOperand(0);
1245 SDValue Op1 = Op.getOperand(1);
1246
1247 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1248 Depth + 1))
1249 return true;
1250 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1251 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1252 Known2, TLO, Depth + 1))
1253 return true;
1254 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1255
1256 // Attempt to avoid multi-use ops if we don't need anything from them.
1257 if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1258 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1259 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1260 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1261 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1262 if (DemandedOp0 || DemandedOp1) {
1263 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1264 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1265 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1266 return TLO.CombineTo(Op, NewOp);
1267 }
1268 }
1269
1270 // If all of the demanded bits are known zero on one side, return the other.
1271 // These bits cannot contribute to the result of the 'or'.
1272 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1273 return TLO.CombineTo(Op, Op0);
1274 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1275 return TLO.CombineTo(Op, Op1);
1276 // If the RHS is a constant, see if we can simplify it.
1277 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1278 return true;
1279 // If the operation can be done in a smaller type, do so.
1280 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1281 return true;
1282
1283 Known |= Known2;
1284 break;
1285 }
1286 case ISD::XOR: {
1287 SDValue Op0 = Op.getOperand(0);
1288 SDValue Op1 = Op.getOperand(1);
1289
1290 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1291 Depth + 1))
1292 return true;
1293 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1294 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1295 Depth + 1))
1296 return true;
1297 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1298
1299 // Attempt to avoid multi-use ops if we don't need anything from them.
1300 if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1301 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1302 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1303 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1304 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1305 if (DemandedOp0 || DemandedOp1) {
1306 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1307 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1308 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1309 return TLO.CombineTo(Op, NewOp);
1310 }
1311 }
1312
1313 // If all of the demanded bits are known zero on one side, return the other.
1314 // These bits cannot contribute to the result of the 'xor'.
1315 if (DemandedBits.isSubsetOf(Known.Zero))
1316 return TLO.CombineTo(Op, Op0);
1317 if (DemandedBits.isSubsetOf(Known2.Zero))
1318 return TLO.CombineTo(Op, Op1);
1319 // If the operation can be done in a smaller type, do so.
1320 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1321 return true;
1322
1323 // If all of the unknown bits are known to be zero on one side or the other
1324 // turn this into an *inclusive* or.
1325 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1326 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1327 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1328
1329 ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
1330 if (C) {
1331 // If one side is a constant, and all of the set bits in the constant are
1332 // also known set on the other side, turn this into an AND, as we know
1333 // the bits will be cleared.
1334 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1335 // NB: it is okay if more bits are known than are requested
1336 if (C->getAPIntValue() == Known2.One) {
1337 SDValue ANDC =
1338 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1339 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1340 }
1341
1342 // If the RHS is a constant, see if we can change it. Don't alter a -1
1343 // constant because that's a 'not' op, and that is better for combining
1344 // and codegen.
1345 if (!C->isAllOnesValue() &&
1346 DemandedBits.isSubsetOf(C->getAPIntValue())) {
1347 // We're flipping all demanded bits. Flip the undemanded bits too.
1348 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1349 return TLO.CombineTo(Op, New);
1350 }
1351 }
1352
1353 // If we can't turn this into a 'not', try to shrink the constant.
1354 if (!C || !C->isAllOnesValue())
1355 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1356 return true;
1357
1358 Known ^= Known2;
1359 break;
1360 }
1361 case ISD::SELECT:
1362 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1363 Depth + 1))
1364 return true;
1365 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1366 Depth + 1))
1367 return true;
1368 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1369 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1370
1371 // If the operands are constants, see if we can simplify them.
1372 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1373 return true;
1374
1375 // Only known if known in both the LHS and RHS.
1376 Known = KnownBits::commonBits(Known, Known2);
1377 break;
1378 case ISD::SELECT_CC:
1379 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1380 Depth + 1))
1381 return true;
1382 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1383 Depth + 1))
1384 return true;
1385 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1386 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1387
1388 // If the operands are constants, see if we can simplify them.
1389 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1390 return true;
1391
1392 // Only known if known in both the LHS and RHS.
1393 Known = KnownBits::commonBits(Known, Known2);
1394 break;
1395 case ISD::SETCC: {
1396 SDValue Op0 = Op.getOperand(0);
1397 SDValue Op1 = Op.getOperand(1);
1398 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1399 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1400 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1401 // -1, we may be able to bypass the setcc.
1402 if (DemandedBits.isSignMask() &&
1403 Op0.getScalarValueSizeInBits() == BitWidth &&
1404 getBooleanContents(Op0.getValueType()) ==
1405 BooleanContent::ZeroOrNegativeOneBooleanContent) {
1406 // If we're testing X < 0, then this compare isn't needed - just use X!
1407 // FIXME: We're limiting to integer types here, but this should also work
1408 // if we don't care about FP signed-zero. The use of SETLT with FP means
1409 // that we don't care about NaNs.
1410 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1411 (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1412 return TLO.CombineTo(Op, Op0);
1413
1414 // TODO: Should we check for other forms of sign-bit comparisons?
1415 // Examples: X <= -1, X >= 0
1416 }
1417 if (getBooleanContents(Op0.getValueType()) ==
1418 TargetLowering::ZeroOrOneBooleanContent &&
1419 BitWidth > 1)
1420 Known.Zero.setBitsFrom(1);
1421 break;
1422 }
1423 case ISD::SHL: {
1424 SDValue Op0 = Op.getOperand(0);
1425 SDValue Op1 = Op.getOperand(1);
1426 EVT ShiftVT = Op1.getValueType();
1427
1428 if (const APInt *SA =
1429 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1430 unsigned ShAmt = SA->getZExtValue();
1431 if (ShAmt == 0)
1432 return TLO.CombineTo(Op, Op0);
1433
1434 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1435 // single shift. We can do this if the bottom bits (which are shifted
1436 // out) are never demanded.
1437 // TODO - support non-uniform vector amounts.
1438 if (Op0.getOpcode() == ISD::SRL) {
1439 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1440 if (const APInt *SA2 =
1441 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1442 unsigned C1 = SA2->getZExtValue();
1443 unsigned Opc = ISD::SHL;
1444 int Diff = ShAmt - C1;
1445 if (Diff < 0) {
1446 Diff = -Diff;
1447 Opc = ISD::SRL;
1448 }
1449 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1450 return TLO.CombineTo(
1451 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1452 }
1453 }
1454 }
1455
1456 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1457 // are not demanded. This will likely allow the anyext to be folded away.
1458 // TODO - support non-uniform vector amounts.
1459 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1460 SDValue InnerOp = Op0.getOperand(0);
1461 EVT InnerVT = InnerOp.getValueType();
1462 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1463 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1464 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1465 EVT ShTy = getShiftAmountTy(InnerVT, DL);
1466 if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1467 ShTy = InnerVT;
1468 SDValue NarrowShl =
1469 TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1470 TLO.DAG.getConstant(ShAmt, dl, ShTy));
1471 return TLO.CombineTo(
1472 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1473 }
1474
1475 // Repeat the SHL optimization above in cases where an extension
1476 // intervenes: (shl (anyext (shr x, c1)), c2) to
1477 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1478 // aren't demanded (as above) and that the shifted upper c1 bits of
1479 // x aren't demanded.
1480 // TODO - support non-uniform vector amounts.
1481 if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1482 InnerOp.hasOneUse()) {
1483 if (const APInt *SA2 =
1484 TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1485 unsigned InnerShAmt = SA2->getZExtValue();
1486 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1487 DemandedBits.getActiveBits() <=
1488 (InnerBits - InnerShAmt + ShAmt) &&
1489 DemandedBits.countTrailingZeros() >= ShAmt) {
1490 SDValue NewSA =
1491 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1492 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1493 InnerOp.getOperand(0));
1494 return TLO.CombineTo(
1495 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1496 }
1497 }
1498 }
1499 }
1500
1501 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1502 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1503 Depth + 1))
1504 return true;
1505 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1506 Known.Zero <<= ShAmt;
1507 Known.One <<= ShAmt;
1508 // low bits known zero.
1509 Known.Zero.setLowBits(ShAmt);
1510
1511 // Try shrinking the operation as long as the shift amount will still be
1512 // in range.
1513 if ((ShAmt < DemandedBits.getActiveBits()) &&
1514 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1515 return true;
1516 }
1517
1518 // If we are only demanding sign bits then we can use the shift source
1519 // directly.
1520 if (const APInt *MaxSA =
1521 TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1522 unsigned ShAmt = MaxSA->getZExtValue();
1523 unsigned NumSignBits =
1524 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1525 unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1526 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1527 return TLO.CombineTo(Op, Op0);
1528 }
1529 break;
1530 }
1531 case ISD::SRL: {
1532 SDValue Op0 = Op.getOperand(0);
1533 SDValue Op1 = Op.getOperand(1);
1534 EVT ShiftVT = Op1.getValueType();
1535
1536 if (const APInt *SA =
1537 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1538 unsigned ShAmt = SA->getZExtValue();
1539 if (ShAmt == 0)
1540 return TLO.CombineTo(Op, Op0);
1541
1542 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1543 // single shift. We can do this if the top bits (which are shifted out)
1544 // are never demanded.
1545 // TODO - support non-uniform vector amounts.
1546 if (Op0.getOpcode() == ISD::SHL) {
1547 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1548 if (const APInt *SA2 =
1549 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1550 unsigned C1 = SA2->getZExtValue();
1551 unsigned Opc = ISD::SRL;
1552 int Diff = ShAmt - C1;
1553 if (Diff < 0) {
1554 Diff = -Diff;
1555 Opc = ISD::SHL;
1556 }
1557 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1558 return TLO.CombineTo(
1559 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1560 }
1561 }
1562 }
1563
1564 APInt InDemandedMask = (DemandedBits << ShAmt);
1565
1566 // If the shift is exact, then it does demand the low bits (and knows that
1567 // they are zero).
1568 if (Op->getFlags().hasExact())
1569 InDemandedMask.setLowBits(ShAmt);
1570
1571 // Compute the new bits that are at the top now.
1572 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1573 Depth + 1))
1574 return true;
1575 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1576 Known.Zero.lshrInPlace(ShAmt);
1577 Known.One.lshrInPlace(ShAmt);
1578 // High bits known zero.
1579 Known.Zero.setHighBits(ShAmt);
1580 }
1581 break;
1582 }
1583 case ISD::SRA: {
1584 SDValue Op0 = Op.getOperand(0);
1585 SDValue Op1 = Op.getOperand(1);
1586 EVT ShiftVT = Op1.getValueType();
1587
1588 // If we only want bits that already match the signbit then we don't need
1589 // to shift.
1590 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1591 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1592 NumHiDemandedBits)
1593 return TLO.CombineTo(Op, Op0);
1594
1595 // If this is an arithmetic shift right and only the low-bit is set, we can
1596 // always convert this into a logical shr, even if the shift amount is
1597 // variable. The low bit of the shift cannot be an input sign bit unless
1598 // the shift amount is >= the size of the datatype, which is undefined.
1599 if (DemandedBits.isOneValue())
1600 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1601
1602 if (const APInt *SA =
1603 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1604 unsigned ShAmt = SA->getZExtValue();
1605 if (ShAmt == 0)
1606 return TLO.CombineTo(Op, Op0);
1607
1608 APInt InDemandedMask = (DemandedBits << ShAmt);
1609
1610 // If the shift is exact, then it does demand the low bits (and knows that
1611 // they are zero).
1612 if (Op->getFlags().hasExact())
1613 InDemandedMask.setLowBits(ShAmt);
1614
1615 // If any of the demanded bits are produced by the sign extension, we also
1616 // demand the input sign bit.
1617 if (DemandedBits.countLeadingZeros() < ShAmt)
1618 InDemandedMask.setSignBit();
1619
1620 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1621 Depth + 1))
1622 return true;
1623 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1624 Known.Zero.lshrInPlace(ShAmt);
1625 Known.One.lshrInPlace(ShAmt);
1626
1627 // If the input sign bit is known to be zero, or if none of the top bits
1628 // are demanded, turn this into an unsigned shift right.
1629 if (Known.Zero[BitWidth - ShAmt - 1] ||
1630 DemandedBits.countLeadingZeros() >= ShAmt) {
1631 SDNodeFlags Flags;
1632 Flags.setExact(Op->getFlags().hasExact());
1633 return TLO.CombineTo(
1634 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1635 }
1636
1637 int Log2 = DemandedBits.exactLogBase2();
1638 if (Log2 >= 0) {
1639 // The bit must come from the sign.
1640 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1641 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1642 }
1643
1644 if (Known.One[BitWidth - ShAmt - 1])
1645 // New bits are known one.
1646 Known.One.setHighBits(ShAmt);
1647
1648 // Attempt to avoid multi-use ops if we don't need anything from them.
1649 if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1650 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1651 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1652 if (DemandedOp0) {
1653 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1654 return TLO.CombineTo(Op, NewOp);
1655 }
1656 }
1657 }
1658 break;
1659 }
1660 case ISD::FSHL:
1661 case ISD::FSHR: {
1662 SDValue Op0 = Op.getOperand(0);
1663 SDValue Op1 = Op.getOperand(1);
1664 SDValue Op2 = Op.getOperand(2);
1665 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1666
1667 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1668 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1669
1670 // For fshl, 0-shift returns the 1st arg.
1671 // For fshr, 0-shift returns the 2nd arg.
1672 if (Amt == 0) {
1673 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1674 Known, TLO, Depth + 1))
1675 return true;
1676 break;
1677 }
1678
1679 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1680 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1681 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1682 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1683 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1684 Depth + 1))
1685 return true;
1686 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1687 Depth + 1))
1688 return true;
1689
1690 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1691 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1692 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1693 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1694 Known.One |= Known2.One;
1695 Known.Zero |= Known2.Zero;
1696 }
1697
1698 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1699 if (isPowerOf2_32(BitWidth)) {
1700 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
1701 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
1702 Known2, TLO, Depth + 1))
1703 return true;
1704 }
1705 break;
1706 }
1707 case ISD::ROTL:
1708 case ISD::ROTR: {
1709 SDValue Op0 = Op.getOperand(0);
1710 SDValue Op1 = Op.getOperand(1);
1711
1712 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
1713 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
1714 return TLO.CombineTo(Op, Op0);
1715
1716 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1717 if (isPowerOf2_32(BitWidth)) {
1718 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
1719 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
1720 Depth + 1))
1721 return true;
1722 }
1723 break;
1724 }
1725 case ISD::UMIN: {
1726 // Check if one arg is always less than (or equal) to the other arg.
1727 SDValue Op0 = Op.getOperand(0);
1728 SDValue Op1 = Op.getOperand(1);
1729 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1730 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1731 Known = KnownBits::umin(Known0, Known1);
1732 if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
1733 return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
1734 if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
1735 return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
1736 break;
1737 }
1738 case ISD::UMAX: {
1739 // Check if one arg is always greater than (or equal) to the other arg.
1740 SDValue Op0 = Op.getOperand(0);
1741 SDValue Op1 = Op.getOperand(1);
1742 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1743 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1744 Known = KnownBits::umax(Known0, Known1);
1745 if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
1746 return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
1747 if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
1748 return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
1749 break;
1750 }
1751 case ISD::BITREVERSE: {
1752 SDValue Src = Op.getOperand(0);
1753 APInt DemandedSrcBits = DemandedBits.reverseBits();
1754 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1755 Depth + 1))
1756 return true;
1757 Known.One = Known2.One.reverseBits();
1758 Known.Zero = Known2.Zero.reverseBits();
1759 break;
1760 }
1761 case ISD::BSWAP: {
1762 SDValue Src = Op.getOperand(0);
1763 APInt DemandedSrcBits = DemandedBits.byteSwap();
1764 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1765 Depth + 1))
1766 return true;
1767 Known.One = Known2.One.byteSwap();
1768 Known.Zero = Known2.Zero.byteSwap();
1769 break;
1770 }
1771 case ISD::CTPOP: {
1772 // If only 1 bit is demanded, replace with PARITY as long as we're before
1773 // op legalization.
1774 // FIXME: Limit to scalars for now.
1775 if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
1776 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
1777 Op.getOperand(0)));
1778
1779 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1780 break;
1781 }
1782 case ISD::SIGN_EXTEND_INREG: {
1783 SDValue Op0 = Op.getOperand(0);
1784 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1785 unsigned ExVTBits = ExVT.getScalarSizeInBits();
1786
1787 // If we only care about the highest bit, don't bother shifting right.
1788 if (DemandedBits.isSignMask()) {
1789 unsigned NumSignBits =
1790 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1791 bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
1792 // However if the input is already sign extended we expect the sign
1793 // extension to be dropped altogether later and do not simplify.
1794 if (!AlreadySignExtended) {
1795 // Compute the correct shift amount type, which must be getShiftAmountTy
1796 // for scalar types after legalization.
1797 EVT ShiftAmtTy = VT;
1798 if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1799 ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1800
1801 SDValue ShiftAmt =
1802 TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1803 return TLO.CombineTo(Op,
1804 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1805 }
1806 }
1807
1808 // If none of the extended bits are demanded, eliminate the sextinreg.
1809 if (DemandedBits.getActiveBits() <= ExVTBits)
1810 return TLO.CombineTo(Op, Op0);
1811
1812 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1813
1814 // Since the sign extended bits are demanded, we know that the sign
1815 // bit is demanded.
1816 InputDemandedBits.setBit(ExVTBits - 1);
1817
1818 if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1819 return true;
1820 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1821
1822 // If the sign bit of the input is known set or clear, then we know the
1823 // top bits of the result.
1824
1825 // If the input sign bit is known zero, convert this into a zero extension.
1826 if (Known.Zero[ExVTBits - 1])
1827 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
1828
1829 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1830 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1831 Known.One.setBitsFrom(ExVTBits);
1832 Known.Zero &= Mask;
1833 } else { // Input sign bit unknown
1834 Known.Zero &= Mask;
1835 Known.One &= Mask;
1836 }
1837 break;
1838 }
1839 case ISD::BUILD_PAIR: {
1840 EVT HalfVT = Op.getOperand(0).getValueType();
1841 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1842
1843 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1844 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1845
1846 KnownBits KnownLo, KnownHi;
1847
1848 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1849 return true;
1850
1851 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1852 return true;
1853
1854 Known.Zero = KnownLo.Zero.zext(BitWidth) |
1855 KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1856
1857 Known.One = KnownLo.One.zext(BitWidth) |
1858 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1859 break;
1860 }
1861 case ISD::ZERO_EXTEND:
1862 case ISD::ZERO_EXTEND_VECTOR_INREG: {
1863 SDValue Src = Op.getOperand(0);
1864 EVT SrcVT = Src.getValueType();
1865 unsigned InBits = SrcVT.getScalarSizeInBits();
1866 unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1867 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
1868
1869 // If none of the top bits are demanded, convert this into an any_extend.
1870 if (DemandedBits.getActiveBits() <= InBits) {
1871 // If we only need the non-extended bits of the bottom element
1872 // then we can just bitcast to the result.
1873 if (IsVecInReg && DemandedElts == 1 &&
1874 VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1875 TLO.DAG.getDataLayout().isLittleEndian())
1876 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1877
1878 unsigned Opc =
1879 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1880 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1881 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1882 }
1883
1884 APInt InDemandedBits = DemandedBits.trunc(InBits);
1885 APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1886 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1887 Depth + 1))
1888 return true;
1889 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1890 assert(Known.getBitWidth() == InBits && "Src width has changed?");
1891 Known = Known.zext(BitWidth);
1892
1893 // Attempt to avoid multi-use ops if we don't need anything from them.
1894 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1895 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
1896 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
1897 break;
1898 }
1899 case ISD::SIGN_EXTEND:
1900 case ISD::SIGN_EXTEND_VECTOR_INREG: {
1901 SDValue Src = Op.getOperand(0);
1902 EVT SrcVT = Src.getValueType();
1903 unsigned InBits = SrcVT.getScalarSizeInBits();
1904 unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1905 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
1906
1907 // If none of the top bits are demanded, convert this into an any_extend.
1908 if (DemandedBits.getActiveBits() <= InBits) {
1909 // If we only need the non-extended bits of the bottom element
1910 // then we can just bitcast to the result.
1911 if (IsVecInReg && DemandedElts == 1 &&
1912 VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1913 TLO.DAG.getDataLayout().isLittleEndian())
1914 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1915
1916 unsigned Opc =
1917 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1918 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1919 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1920 }
1921
1922 APInt InDemandedBits = DemandedBits.trunc(InBits);
1923 APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1924
1925 // Since some of the sign extended bits are demanded, we know that the sign
1926 // bit is demanded.
1927 InDemandedBits.setBit(InBits - 1);
1928
1929 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1930 Depth + 1))
1931 return true;
1932 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1933 assert(Known.getBitWidth() == InBits && "Src width has changed?");
1934
1935 // If the sign bit is known one, the top bits match.
1936 Known = Known.sext(BitWidth);
1937
1938 // If the sign bit is known zero, convert this to a zero extend.
1939 if (Known.isNonNegative()) {
1940 unsigned Opc =
1941 IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
1942 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1943 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1944 }
1945
1946 // Attempt to avoid multi-use ops if we don't need anything from them.
1947 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1948 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
1949 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
1950 break;
1951 }
1952 case ISD::ANY_EXTEND:
1953 case ISD::ANY_EXTEND_VECTOR_INREG: {
1954 SDValue Src = Op.getOperand(0);
1955 EVT SrcVT = Src.getValueType();
1956 unsigned InBits = SrcVT.getScalarSizeInBits();
1957 unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1958 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1959
1960 // If we only need the bottom element then we can just bitcast.
1961 // TODO: Handle ANY_EXTEND?
1962 if (IsVecInReg && DemandedElts == 1 &&
1963 VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1964 TLO.DAG.getDataLayout().isLittleEndian())
1965 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1966
1967 APInt InDemandedBits = DemandedBits.trunc(InBits);
1968 APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1969 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1970 Depth + 1))
1971 return true;
1972 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1973 assert(Known.getBitWidth() == InBits && "Src width has changed?");
1974 Known = Known.anyext(BitWidth);
1975
1976 // Attempt to avoid multi-use ops if we don't need anything from them.
1977 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1978 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
1979 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
1980 break;
1981 }
1982 case ISD::TRUNCATE: {
1983 SDValue Src = Op.getOperand(0);
1984
1985 // Simplify the input, using demanded bit information, and compute the known
1986 // zero/one bits live out.
1987 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1988 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1989 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
1990 Depth + 1))
1991 return true;
1992 Known = Known.trunc(BitWidth);
1993
1994 // Attempt to avoid multi-use ops if we don't need anything from them.
1995 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1996 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
1997 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
1998
1999 // If the input is only used by this truncate, see if we can shrink it based
2000 // on the known demanded bits.
2001 if (Src.getNode()->hasOneUse()) {
2002 switch (Src.getOpcode()) {
2003 default:
2004 break;
2005 case ISD::SRL:
2006 // Shrink SRL by a constant if none of the high bits shifted in are
2007 // demanded.
2008 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2009 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2010 // undesirable.
2011 break;
2012
2013 const APInt *ShAmtC =
2014 TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2015 if (!ShAmtC || ShAmtC->uge(BitWidth))
2016 break;
2017 uint64_t ShVal = ShAmtC->getZExtValue();
2018
2019 APInt HighBits =
2020 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2021 HighBits.lshrInPlace(ShVal);
2022 HighBits = HighBits.trunc(BitWidth);
2023
2024 if (!(HighBits & DemandedBits)) {
2025 // None of the shifted in bits are needed. Add a truncate of the
2026 // shift input, then shift it.
2027 SDValue NewShAmt = TLO.DAG.getConstant(
2028 ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2029 SDValue NewTrunc =
2030 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2031 return TLO.CombineTo(
2032 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2033 }
2034 break;
2035 }
2036 }
2037
2038 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2039 break;
2040 }
2041 case ISD::AssertZext: {
2042 // AssertZext demands all of the high bits, plus any of the low bits
2043 // demanded by its users.
2044 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2045 APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2046 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2047 TLO, Depth + 1))
2048 return true;
2049 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2050
2051 Known.Zero |= ~InMask;
2052 break;
2053 }
2054 case ISD::EXTRACT_VECTOR_ELT: {
2055 SDValue Src = Op.getOperand(0);
2056 SDValue Idx = Op.getOperand(1);
2057 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2058 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2059
2060 if (SrcEltCnt.isScalable())
2061 return false;
2062
2063 // Demand the bits from every vector element without a constant index.
2064 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2065 APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
2066 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2067 if (CIdx->getAPIntValue().ult(NumSrcElts))
2068 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2069
2070 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2071 // anything about the extended bits.
2072 APInt DemandedSrcBits = DemandedBits;
2073 if (BitWidth > EltBitWidth)
2074 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2075
2076 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2077 Depth + 1))
2078 return true;
2079
2080 // Attempt to avoid multi-use ops if we don't need anything from them.
2081 if (!DemandedSrcBits.isAllOnesValue() ||
2082 !DemandedSrcElts.isAllOnesValue()) {
2083 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2084 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2085 SDValue NewOp =
2086 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2087 return TLO.CombineTo(Op, NewOp);
2088 }
2089 }
2090
2091 Known = Known2;
2092 if (BitWidth > EltBitWidth)
2093 Known = Known.anyext(BitWidth);
2094 break;
2095 }
2096 case ISD::BITCAST: {
2097 SDValue Src = Op.getOperand(0);
2098 EVT SrcVT = Src.getValueType();
2099 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2100
2101 // If this is an FP->Int bitcast and if the sign bit is the only
2102 // thing demanded, turn this into a FGETSIGN.
2103 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2104 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2105 SrcVT.isFloatingPoint()) {
2106 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2107 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2108 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2109 SrcVT != MVT::f128) {
2110 // Cannot eliminate/lower SHL for f128 yet.
2111 EVT Ty = OpVTLegal ? VT : MVT::i32;
2112 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2113 // place. We expect the SHL to be eliminated by other optimizations.
2114 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2115 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2116 if (!OpVTLegal && OpVTSizeInBits > 32)
2117 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2118 unsigned ShVal = Op.getValueSizeInBits() - 1;
2119 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2120 return TLO.CombineTo(Op,
2121 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2122 }
2123 }
2124
2125 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2126 // Demand the elt/bit if any of the original elts/bits are demanded.
2127 // TODO - bigendian once we have test coverage.
2128 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
2129 TLO.DAG.getDataLayout().isLittleEndian()) {
2130 unsigned Scale = BitWidth / NumSrcEltBits;
2131 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2132 APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
2133 APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
2134 for (unsigned i = 0; i != Scale; ++i) {
2135 unsigned Offset = i * NumSrcEltBits;
2136 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
2137 if (!Sub.isNullValue()) {
2138 DemandedSrcBits |= Sub;
2139 for (unsigned j = 0; j != NumElts; ++j)
2140 if (DemandedElts[j])
2141 DemandedSrcElts.setBit((j * Scale) + i);
2142 }
2143 }
2144
2145 APInt KnownSrcUndef, KnownSrcZero;
2146 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2147 KnownSrcZero, TLO, Depth + 1))
2148 return true;
2149
2150 KnownBits KnownSrcBits;
2151 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2152 KnownSrcBits, TLO, Depth + 1))
2153 return true;
2154 } else if ((NumSrcEltBits % BitWidth) == 0 &&
2155 TLO.DAG.getDataLayout().isLittleEndian()) {
2156 unsigned Scale = NumSrcEltBits / BitWidth;
2157 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2158 APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
2159 APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
2160 for (unsigned i = 0; i != NumElts; ++i)
2161 if (DemandedElts[i]) {
2162 unsigned Offset = (i % Scale) * BitWidth;
2163 DemandedSrcBits.insertBits(DemandedBits, Offset);
2164 DemandedSrcElts.setBit(i / Scale);
2165 }
2166
2167 if (SrcVT.isVector()) {
2168 APInt KnownSrcUndef, KnownSrcZero;
2169 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2170 KnownSrcZero, TLO, Depth + 1))
2171 return true;
2172 }
2173
2174 KnownBits KnownSrcBits;
2175 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2176 KnownSrcBits, TLO, Depth + 1))
2177 return true;
2178 }
2179
2180 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2181 // recursive call where Known may be useful to the caller.
2182 if (Depth > 0) {
2183 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2184 return false;
2185 }
2186 break;
2187 }
2188 case ISD::ADD:
2189 case ISD::MUL:
2190 case ISD::SUB: {
2191 // Add, Sub, and Mul don't demand any bits in positions beyond that
2192 // of the highest bit demanded of them.
2193 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2194 SDNodeFlags Flags = Op.getNode()->getFlags();
2195 unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2196 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2197 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2198 Depth + 1) ||
2199 SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2200 Depth + 1) ||
2201 // See if the operation should be performed at a smaller bit width.
2202 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2203 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2204 // Disable the nsw and nuw flags. We can no longer guarantee that we
2205 // won't wrap after simplification.
2206 Flags.setNoSignedWrap(false);
2207 Flags.setNoUnsignedWrap(false);
2208 SDValue NewOp =
2209 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2210 return TLO.CombineTo(Op, NewOp);
2211 }
2212 return true;
2213 }
2214
2215 // Attempt to avoid multi-use ops if we don't need anything from them.
2216 if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
2217 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2218 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2219 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2220 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2221 if (DemandedOp0 || DemandedOp1) {
2222 Flags.setNoSignedWrap(false);
2223 Flags.setNoUnsignedWrap(false);
2224 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2225 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2226 SDValue NewOp =
2227 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2228 return TLO.CombineTo(Op, NewOp);
2229 }
2230 }
2231
2232 // If we have a constant operand, we may be able to turn it into -1 if we
2233 // do not demand the high bits. This can make the constant smaller to
2234 // encode, allow more general folding, or match specialized instruction
2235 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2236 // is probably not useful (and could be detrimental).
2237 ConstantSDNode *C = isConstOrConstSplat(Op1);
2238 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2239 if (C && !C->isAllOnesValue() && !C->isOne() &&
2240 (C->getAPIntValue() | HighMask).isAllOnesValue()) {
2241 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2242 // Disable the nsw and nuw flags. We can no longer guarantee that we
2243 // won't wrap after simplification.
2244 Flags.setNoSignedWrap(false);
2245 Flags.setNoUnsignedWrap(false);
2246 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2247 return TLO.CombineTo(Op, NewOp);
2248 }
2249
2250 LLVM_FALLTHROUGH;
2251 }
2252 default:
2253 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2254 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2255 Known, TLO, Depth))
2256 return true;
2257 break;
2258 }
2259
2260 // Just use computeKnownBits to compute output bits.
2261 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2262 break;
2263 }
2264
2265 // If we know the value of all of the demanded bits, return this as a
2266 // constant.
2267 if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2268 // Avoid folding to a constant if any OpaqueConstant is involved.
2269 const SDNode *N = Op.getNode();
2270 for (SDNodeIterator I = SDNodeIterator::begin(N),
2271 E = SDNodeIterator::end(N);
2272 I != E; ++I) {
2273 SDNode *Op = *I;
2274 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2275 if (C->isOpaque())
2276 return false;
2277 }
2278 if (VT.isInteger())
2279 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2280 if (VT.isFloatingPoint())
2281 return TLO.CombineTo(
2282 Op,
2283 TLO.DAG.getConstantFP(
2284 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2285 }
2286
2287 return false;
2288}
2289
2290bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2291 const APInt &DemandedElts,
2292 APInt &KnownUndef,
2293 APInt &KnownZero,
2294 DAGCombinerInfo &DCI) const {
2295 SelectionDAG &DAG = DCI.DAG;
2296 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2297 !DCI.isBeforeLegalizeOps());
2298
2299 bool Simplified =
2300 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2301 if (Simplified) {
2302 DCI.AddToWorklist(Op.getNode());
2303 DCI.CommitTargetLoweringOpt(TLO);
2304 }
2305
2306 return Simplified;
2307}
2308
2309/// Given a vector binary operation and known undefined elements for each input
2310/// operand, compute whether each element of the output is undefined.
2311static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2312 const APInt &UndefOp0,
2313 const APInt &UndefOp1) {
2314 EVT VT = BO.getValueType();
2315 assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2316 "Vector binop only");
2317
2318 EVT EltVT = VT.getVectorElementType();
2319 unsigned NumElts = VT.getVectorNumElements();
2320 assert(UndefOp0.getBitWidth() == NumElts &&
2321 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2322
2323 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2324 const APInt &UndefVals) {
2325 if (UndefVals[Index])
2326 return DAG.getUNDEF(EltVT);
2327
2328 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2329 // Try hard to make sure that the getNode() call is not creating temporary
2330 // nodes. Ignore opaque integers because they do not constant fold.
2331 SDValue Elt = BV->getOperand(Index);
2332 auto *C = dyn_cast<ConstantSDNode>(Elt);
2333 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2334 return Elt;
2335 }
2336
2337 return SDValue();
2338 };
2339
2340 APInt KnownUndef = APInt::getNullValue(NumElts);
2341 for (unsigned i = 0; i != NumElts; ++i) {
2342 // If both inputs for this element are either constant or undef and match
2343 // the element type, compute the constant/undef result for this element of
2344 // the vector.
2345 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2346 // not handle FP constants. The code within getNode() should be refactored
2347 // to avoid the danger of creating a bogus temporary node here.
2348 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2349 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2350 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2351 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2352 KnownUndef.setBit(i);
2353 }
2354 return KnownUndef;
2355}
2356
2357bool TargetLowering::SimplifyDemandedVectorElts(
2358 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2359 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2360 bool AssumeSingleUse) const {
2361 EVT VT = Op.getValueType();
2362 unsigned Opcode = Op.getOpcode();
2363 APInt DemandedElts = OriginalDemandedElts;
2364 unsigned NumElts = DemandedElts.getBitWidth();
2365 assert(VT.isVector() && "Expected vector op");
2366
2367 KnownUndef = KnownZero = APInt::getNullValue(NumElts);
2368
2369 // TODO: For now we assume we know nothing about scalable vectors.
2370 if (VT.isScalableVector())
2371 return false;
2372
2373 assert(VT.getVectorNumElements() == NumElts &&
2374 "Mask size mismatches value type element count!");
2375
2376 // Undef operand.
2377 if (Op.isUndef()) {
2378 KnownUndef.setAllBits();
2379 return false;
2380 }
2381
2382 // If Op has other users, assume that all elements are needed.
2383 if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2384 DemandedElts.setAllBits();
2385
2386 // Not demanding any elements from Op.
2387 if (DemandedElts == 0) {
2388 KnownUndef.setAllBits();
2389 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2390 }
2391
2392 // Limit search depth.
2393 if (Depth >= SelectionDAG::MaxRecursionDepth)
2394 return false;
2395
2396 SDLoc DL(Op);
2397 unsigned EltSizeInBits = VT.getScalarSizeInBits();
2398
2399 // Helper for demanding the specified elements and all the bits of both binary
2400 // operands.
2401 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2402 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2403 TLO.DAG, Depth + 1);
2404 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2405 TLO.DAG, Depth + 1);
2406 if (NewOp0 || NewOp1) {
2407 SDValue NewOp = TLO.DAG.getNode(
2408 Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2409 return TLO.CombineTo(Op, NewOp);
2410 }
2411 return false;
2412 };
2413
2414 switch (Opcode) {
2415 case ISD::SCALAR_TO_VECTOR: {
2416 if (!DemandedElts[0]) {
2417 KnownUndef.setAllBits();
2418 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2419 }
2420 KnownUndef.setHighBits(NumElts - 1);
2421 break;
2422 }
2423 case ISD::BITCAST: {
2424 SDValue Src = Op.getOperand(0);
2425 EVT SrcVT = Src.getValueType();
2426
2427 // We only handle vectors here.
2428 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2429 if (!SrcVT.isVector())
2430 break;
2431
2432 // Fast handling of 'identity' bitcasts.
2433 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2434 if (NumSrcElts == NumElts)
2435 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2436 KnownZero, TLO, Depth + 1);
2437
2438 APInt SrcZero, SrcUndef;
2439 APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
2440
2441 // Bitcast from 'large element' src vector to 'small element' vector, we
2442 // must demand a source element if any DemandedElt maps to it.
2443 if ((NumElts % NumSrcElts) == 0) {
2444 unsigned Scale = NumElts / NumSrcElts;
2445 for (unsigned i = 0; i != NumElts; ++i)
2446 if (DemandedElts[i])
2447 SrcDemandedElts.setBit(i / Scale);
2448
2449 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2450 TLO, Depth + 1))
2451 return true;
2452
2453 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2454 // of the large element.
2455 // TODO - bigendian once we have test coverage.
2456 if (TLO.DAG.getDataLayout().isLittleEndian()) {
2457 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2458 APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
2459 for (unsigned i = 0; i != NumElts; ++i)
2460 if (DemandedElts[i]) {
2461 unsigned Ofs = (i % Scale) * EltSizeInBits;
2462 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2463 }
2464
2465 KnownBits Known;
2466 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2467 TLO, Depth + 1))
2468 return true;
2469 }
2470
2471 // If the src element is zero/undef then all the output elements will be -
2472 // only demanded elements are guaranteed to be correct.
2473 for (unsigned i = 0; i != NumSrcElts; ++i) {
2474 if (SrcDemandedElts[i]) {
2475 if (SrcZero[i])
2476 KnownZero.setBits(i * Scale, (i + 1) * Scale);
2477 if (SrcUndef[i])
2478 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2479 }
2480 }
2481 }
2482
2483 // Bitcast from 'small element' src vector to 'large element' vector, we
2484 // demand all smaller source elements covered by the larger demanded element
2485 // of this vector.
2486 if ((NumSrcElts % NumElts) == 0) {
2487 unsigned Scale = NumSrcElts / NumElts;
2488 for (unsigned i = 0; i != NumElts; ++i)
2489 if (DemandedElts[i])
2490 SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
2491
2492 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2493 TLO, Depth + 1))
2494 return true;
2495
2496 // If all the src elements covering an output element are zero/undef, then
2497 // the output element will be as well, assuming it was demanded.
2498 for (unsigned i = 0; i != NumElts; ++i) {
2499 if (DemandedElts[i]) {
2500 if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
2501 KnownZero.setBit(i);
2502 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
2503 KnownUndef.setBit(i);
2504 }
2505 }
2506 }
2507 break;
2508 }
2509 case ISD::BUILD_VECTOR: {
2510 // Check all elements and simplify any unused elements with UNDEF.
2511 if (!DemandedElts.isAllOnesValue()) {
2512 // Don't simplify BROADCASTS.
2513 if (llvm::any_of(Op->op_values(),
2514 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2515 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2516 bool Updated = false;
2517 for (unsigned i = 0; i != NumElts; ++i) {
2518 if (!DemandedElts[i] && !Ops[i].isUndef()) {
2519 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2520 KnownUndef.setBit(i);
2521 Updated = true;
2522 }
2523 }
2524 if (Updated)
2525 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2526 }
2527 }
2528 for (unsigned i = 0; i != NumElts; ++i) {
2529 SDValue SrcOp = Op.getOperand(i);
2530 if (SrcOp.isUndef()) {
2531 KnownUndef.setBit(i);
2532 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2533 (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
2534 KnownZero.setBit(i);
2535 }
2536 }
2537 break;
2538 }
2539 case ISD::CONCAT_VECTORS: {
2540 EVT SubVT = Op.getOperand(0).getValueType();
2541 unsigned NumSubVecs = Op.getNumOperands();
2542 unsigned NumSubElts = SubVT.getVectorNumElements();
2543 for (unsigned i = 0; i != NumSubVecs; ++i) {
2544 SDValue SubOp = Op.getOperand(i);
2545 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2546 APInt SubUndef, SubZero;
2547 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2548 Depth + 1))
2549 return true;
2550 KnownUndef.insertBits(SubUndef, i * NumSubElts);
2551 KnownZero.insertBits(SubZero, i * NumSubElts);
2552 }
2553 break;
2554 }
2555 case ISD::INSERT_SUBVECTOR: {
2556 // Demand any elements from the subvector and the remainder from the src its
2557 // inserted into.
2558 SDValue Src = Op.getOperand(0);
2559 SDValue Sub = Op.getOperand(1);
2560 uint64_t Idx = Op.getConstantOperandVal(2);
2561 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
2562 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
2563 APInt DemandedSrcElts = DemandedElts;
2564 DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
2565
2566 APInt SubUndef, SubZero;
2567 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
2568 Depth + 1))
2569 return true;
2570
2571 // If none of the src operand elements are demanded, replace it with undef.
2572 if (!DemandedSrcElts && !Src.isUndef())
2573 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2574 TLO.DAG.getUNDEF(VT), Sub,
2575 Op.getOperand(2)));
2576
2577 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
2578 TLO, Depth + 1))
2579 return true;
2580 KnownUndef.insertBits(SubUndef, Idx);
2581 KnownZero.insertBits(SubZero, Idx);
2582
2583 // Attempt to avoid multi-use ops if we don't need anything from them.
2584 if (!DemandedSrcElts.isAllOnesValue() ||
2585 !DemandedSubElts.isAllOnesValue()) {
2586 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2587 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2588 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
2589 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
2590 if (NewSrc || NewSub) {
2591 NewSrc = NewSrc ? NewSrc : Src;
2592 NewSub = NewSub ? NewSub : Sub;
2593 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2594 NewSub, Op.getOperand(2));
2595 return TLO.CombineTo(Op, NewOp);
2596 }
2597 }
2598 break;
2599 }
2600 case ISD::EXTRACT_SUBVECTOR: {
2601 // Offset the demanded elts by the subvector index.
2602 SDValue Src = Op.getOperand(0);
2603 if (Src.getValueType().isScalableVector())
2604 break;
2605 uint64_t Idx = Op.getConstantOperandVal(1);
2606 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2607 APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2608
2609 APInt SrcUndef, SrcZero;
2610 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2611 Depth + 1))
2612 return true;
2613 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2614 KnownZero = SrcZero.extractBits(NumElts, Idx);
2615
2616 // Attempt to avoid multi-use ops if we don't need anything from them.
2617 if (!DemandedElts.isAllOnesValue()) {
2618 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2619 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2620 if (NewSrc) {
2621 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2622 Op.getOperand(1));
2623 return TLO.CombineTo(Op, NewOp);
2624 }
2625 }
2626 break;
2627 }
2628 case ISD::INSERT_VECTOR_ELT: {
2629 SDValue Vec = Op.getOperand(0);
2630 SDValue Scl = Op.getOperand(1);
2631 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2632
2633 // For a legal, constant insertion index, if we don't need this insertion
2634 // then strip it, else remove it from the demanded elts.
2635 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2636 unsigned Idx = CIdx->getZExtValue();
2637 if (!DemandedElts[Idx])
2638 return TLO.CombineTo(Op, Vec);
2639
2640 APInt DemandedVecElts(DemandedElts);
2641 DemandedVecElts.clearBit(Idx);
2642 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2643 KnownZero, TLO, Depth + 1))
2644 return true;
2645
2646 KnownUndef.setBitVal(Idx, Scl.isUndef());
2647
2648 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
2649 break;
2650 }
2651
2652 APInt VecUndef, VecZero;
2653 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2654 Depth + 1))
2655 return true;
2656 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2657 break;
2658 }
2659 case ISD::VSELECT: {
2660 // Try to transform the select condition based on the current demanded
2661 // elements.
2662 // TODO: If a condition element is undef, we can choose from one arm of the
2663 // select (and if one arm is undef, then we can propagate that to the
2664 // result).
2665 // TODO - add support for constant vselect masks (see IR version of this).
2666 APInt UnusedUndef, UnusedZero;
2667 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2668 UnusedZero, TLO, Depth + 1))
2669 return true;
2670
2671 // See if we can simplify either vselect operand.
2672 APInt DemandedLHS(DemandedElts);
2673 APInt DemandedRHS(DemandedElts);
2674 APInt UndefLHS, ZeroLHS;
2675 APInt UndefRHS, ZeroRHS;
2676 if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2677 ZeroLHS, TLO, Depth + 1))
2678 return true;
2679 if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2680 ZeroRHS, TLO, Depth + 1))
2681 return true;
2682
2683 KnownUndef = UndefLHS & UndefRHS;
2684 KnownZero = ZeroLHS & ZeroRHS;
2685 break;
2686 }
2687 case ISD::VECTOR_SHUFFLE: {
2688 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2689
2690 // Collect demanded elements from shuffle operands..
2691 APInt DemandedLHS(NumElts, 0);
2692 APInt DemandedRHS(NumElts, 0);
2693 for (unsigned i = 0; i != NumElts; ++i) {
2694 int M = ShuffleMask[i];
2695 if (M < 0 || !DemandedElts[i])
2696 continue;
2697 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
2698 if (M < (int)NumElts)
2699 DemandedLHS.setBit(M);
2700 else
2701 DemandedRHS.setBit(M - NumElts);
2702 }
2703
2704 // See if we can simplify either shuffle operand.
2705 APInt UndefLHS, ZeroLHS;
2706 APInt UndefRHS, ZeroRHS;
2707 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
2708 ZeroLHS, TLO, Depth + 1))
2709 return true;
2710 if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
2711 ZeroRHS, TLO, Depth + 1))
2712 return true;
2713
2714 // Simplify mask using undef elements from LHS/RHS.
2715 bool Updated = false;
2716 bool IdentityLHS = true, IdentityRHS = true;
2717 SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
2718 for (unsigned i = 0; i != NumElts; ++i) {
2719 int &M = NewMask[i];
2720 if (M < 0)
2721 continue;
2722 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
2723 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
2724 Updated = true;
2725 M = -1;
2726 }
2727 IdentityLHS &= (M < 0) || (M == (int)i);
2728 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
2729 }
2730
2731 // Update legal shuffle masks based on demanded elements if it won't reduce
2732 // to Identity which can cause premature removal of the shuffle mask.
2733 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
2734 SDValue LegalShuffle =
2735 buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
2736 NewMask, TLO.DAG);
2737 if (LegalShuffle)
2738 return TLO.CombineTo(Op, LegalShuffle);
2739 }
2740
2741 // Propagate undef/zero elements from LHS/RHS.
2742 for (unsigned i = 0; i != NumElts; ++i) {
2743 int M = ShuffleMask[i];
2744 if (M < 0) {
2745 KnownUndef.setBit(i);
2746 } else if (M < (int)NumElts) {
2747 if (UndefLHS[M])
2748 KnownUndef.setBit(i);
2749 if (ZeroLHS[M])
2750 KnownZero.setBit(i);
2751 } else {
2752 if (UndefRHS[M - NumElts])
2753 KnownUndef.setBit(i);
2754 if (ZeroRHS[M - NumElts])
2755 KnownZero.setBit(i);
2756 }
2757 }
2758 break;
2759 }
2760 case ISD::ANY_EXTEND_VECTOR_INREG:
2761 case ISD::SIGN_EXTEND_VECTOR_INREG:
2762 case ISD::ZERO_EXTEND_VECTOR_INREG: {
2763 APInt SrcUndef, SrcZero;
2764 SDValue Src = Op.getOperand(0);
2765 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2766 APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
2767 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2768 Depth + 1))
2769 return true;
2770 KnownZero = SrcZero.zextOrTrunc(NumElts);
2771 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
2772
2773 if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
2774 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
2775 DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
2776 // aext - if we just need the bottom element then we can bitcast.
2777 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2778 }
2779
2780 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
2781 // zext(undef) upper bits are guaranteed to be zero.
2782 if (DemandedElts.isSubsetOf(KnownUndef))
2783 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2784 KnownUndef.clearAllBits();
2785 }
2786 break;
2787 }
2788
2789 // TODO: There are more binop opcodes that could be handled here - MIN,
2790 // MAX, saturated math, etc.
2791 case ISD::OR:
2792 case ISD::XOR:
2793 case ISD::ADD:
2794 case ISD::SUB:
2795 case ISD::FADD:
2796 case ISD::FSUB:
2797 case ISD::FMUL:
2798 case ISD::FDIV:
2799 case ISD::FREM: {
2800 SDValue Op0 = Op.getOperand(0);
2801 SDValue Op1 = Op.getOperand(1);
2802
2803 APInt UndefRHS, ZeroRHS;
2804 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
2805 Depth + 1))
2806 return true;
2807 APInt UndefLHS, ZeroLHS;
2808 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
2809 Depth + 1))
2810 return true;
2811
2812 KnownZero = ZeroLHS & ZeroRHS;
2813 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
2814
2815 // Attempt to avoid multi-use ops if we don't need anything from them.
2816 // TODO - use KnownUndef to relax the demandedelts?
2817 if (!DemandedElts.isAllOnesValue())
2818 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2819 return true;
2820 break;
2821 }
2822 case ISD::SHL:
2823 case ISD::SRL:
2824 case ISD::SRA:
2825 case ISD::ROTL:
2826 case ISD::ROTR: {
2827 SDValue Op0 = Op.getOperand(0);
2828 SDValue Op1 = Op.getOperand(1);
2829
2830 APInt UndefRHS, ZeroRHS;
2831 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
2832 Depth + 1))
2833 return true;
2834 APInt UndefLHS, ZeroLHS;
2835 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
2836 Depth + 1))
2837 return true;
2838
2839 KnownZero = ZeroLHS;
2840 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
2841
2842 // Attempt to avoid multi-use ops if we don't need anything from them.
2843 // TODO - use KnownUndef to relax the demandedelts?
2844 if (!DemandedElts.isAllOnesValue())
2845 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2846 return true;
2847 break;
2848 }
2849 case ISD::MUL:
2850 case ISD::AND: {
2851 SDValue Op0 = Op.getOperand(0);
2852 SDValue Op1 = Op.getOperand(1);
2853
2854 APInt SrcUndef, SrcZero;
2855 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
2856 Depth + 1))
2857 return true;
2858 if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
2859 TLO, Depth + 1))
2860 return true;
2861
2862 // If either side has a zero element, then the result element is zero, even
2863 // if the other is an UNDEF.
2864 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2865 // and then handle 'and' nodes with the rest of the binop opcodes.
2866 KnownZero |= SrcZero;
2867 KnownUndef &= SrcUndef;
2868 KnownUndef &= ~KnownZero;
2869
2870 // Attempt to avoid multi-use ops if we don't need anything from them.
2871 // TODO - use KnownUndef to relax the demandedelts?
2872 if (!DemandedElts.isAllOnesValue())
2873 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
2874 return true;
2875 break;
2876 }
2877 case ISD::TRUNCATE:
2878 case ISD::SIGN_EXTEND:
2879 case ISD::ZERO_EXTEND:
2880 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2881 KnownZero, TLO, Depth + 1))
2882 return true;
2883
2884 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
2885 // zext(undef) upper bits are guaranteed to be zero.
2886 if (DemandedElts.isSubsetOf(KnownUndef))
2887 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2888 KnownUndef.clearAllBits();
2889 }
2890 break;
2891 default: {
2892 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2893 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
2894 KnownZero, TLO, Depth))
2895 return true;
2896 } else {
2897 KnownBits Known;
2898 APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
2899 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
2900 TLO, Depth, AssumeSingleUse))
2901 return true;
2902 }
2903 break;
2904 }
2905 }
2906 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
2907
2908 // Constant fold all undef cases.
2909 // TODO: Handle zero cases as well.
2910 if (DemandedElts.isSubsetOf(KnownUndef))
2911 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2912
2913 return false;
2914}
2915
2916/// Determine which of the bits specified in Mask are known to be either zero or
2917/// one and return them in the Known.
2918void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2919 KnownBits &Known,
2920 const APInt &DemandedElts,
2921 const SelectionDAG &DAG,
2922 unsigned Depth) const {
2923 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2924 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2925 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2926 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2927 "Should use MaskedValueIsZero if you don't know whether Op"
2928 " is a target node!");
2929 Known.resetAll();
2930}
2931
2932void TargetLowering::computeKnownBitsForTargetInstr(
2933 GISelKnownBits &Analysis, Register R, KnownBits &Known,
2934 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
2935 unsigned Depth) const {
2936 Known.resetAll();
2937}
2938
2939void TargetLowering::computeKnownBitsForFrameIndex(
2940 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
2941 // The low bits are known zero if the pointer is aligned.
2942 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
2943}
2944
2945Align TargetLowering::computeKnownAlignForTargetInstr(
2946 GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
2947 unsigned Depth) const {
2948 return Align(1);
2949}
2950
2951/// This method can be implemented by targets that want to expose additional
2952/// information about sign bits to the DAG Combiner.
2953unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2954 const APInt &,
2955 const SelectionDAG &,
2956 unsigned Depth) const {
2957 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2958 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2959 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2960 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2961 "Should use ComputeNumSignBits if you don't know whether Op"
2962 " is a target node!");
2963 return 1;
2964}
2965
2966unsigned TargetLowering::computeNumSignBitsForTargetInstr(
2967 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
2968 const MachineRegisterInfo &MRI, unsigned Depth) const {
2969 return 1;
2970}
2971
2972bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
2973 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
2974 TargetLoweringOpt &TLO, unsigned Depth) const {
2975 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2976 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2977 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2978 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2979 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2980 " is a target node!");
2981 return false;
2982}
2983
2984bool TargetLowering::SimplifyDemandedBitsForTargetNode(
2985 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2986 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
2987 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2988 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2989 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2990 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2991 "Should use SimplifyDemandedBits if you don't know whether Op"
2992 " is a target node!");
2993 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
2994 return false;
2995}
2996
2997SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
2998 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2999 SelectionDAG &DAG, unsigned Depth) const {
3000 assert(
3001 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3002 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3003 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3004 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3005 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3006 " is a target node!");
3007 return SDValue();
3008}
3009
3010SDValue
3011TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3012 SDValue N1, MutableArrayRef<int> Mask,
3013 SelectionDAG &DAG) const {
3014 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3015 if (!LegalMask) {
3016 std::swap(N0, N1);
3017 ShuffleVectorSDNode::commuteMask(Mask);
3018 LegalMask = isShuffleMaskLegal(Mask, VT);
3019 }
3020
3021 if (!LegalMask)
3022 return SDValue();
3023
3024 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3025}
3026
3027const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3028 return nullptr;
3029}
3030
3031bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3032 const SelectionDAG &DAG,
3033 bool SNaN,
3034 unsigned Depth) const {
3035 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3036 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3037 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3038 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3039 "Should use isKnownNeverNaN if you don't know whether Op"
3040 " is a target node!");
3041 return false;
3042}
3043
3044// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3045// work with truncating build vectors and vectors with elements of less than
3046// 8 bits.
3047bool TargetLowering::isConstTrueVal(const SDNode *N) const {
3048 if (!N)
3049 return false;
3050
3051 APInt CVal;
3052 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
3053 CVal = CN->getAPIntValue();
3054 } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
3055 auto *CN = BV->getConstantSplatNode();
3056 if (!CN)
3057 return false;
3058
3059 // If this is a truncating build vector, truncate the splat value.
3060 // Otherwise, we may fail to match the expected values below.
3061 unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
3062 CVal = CN->getAPIntValue();
3063 if (BVEltWidth < CVal.getBitWidth())
3064 CVal = CVal.trunc(BVEltWidth);
3065 } else {
3066 return false;
3067 }
3068
3069 switch (getBooleanContents(N->getValueType(0))) {
3070 case UndefinedBooleanContent:
3071 return CVal[0];
3072 case ZeroOrOneBooleanContent:
3073 return CVal.isOneValue();
3074 case ZeroOrNegativeOneBooleanContent:
3075 return CVal.isAllOnesValue();
3076 }
3077
3078 llvm_unreachable("Invalid boolean contents");
3079}
3080
3081bool TargetLowering::isConstFalseVal(const SDNode *N) const {
3082 if (!N)
3083 return false;
3084
3085 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3086 if (!CN) {
3087 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3088 if (!BV)
3089 return false;
3090
3091 // Only interested in constant splats, we don't care about undef
3092 // elements in identifying boolean constants and getConstantSplatNode
3093 // returns NULL if all ops are undef;
3094 CN = BV->getConstantSplatNode();
3095 if (!CN)
3096 return false;
3097 }
3098
3099 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3100 return !CN->getAPIntValue()[0];
3101
3102 return CN->isNullValue();
3103}
3104
3105bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3106 bool SExt) const {
3107 if (VT == MVT::i1)
3108 return N->isOne();
3109
3110 TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3111 switch (Cnt) {
3112 case TargetLowering::ZeroOrOneBooleanContent:
3113 // An extended value of 1 is always true, unless its original type is i1,
3114 // in which case it will be sign extended to -1.
3115 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3116 case TargetLowering::UndefinedBooleanContent:
3117 case TargetLowering::ZeroOrNegativeOneBooleanContent:
3118 return N->isAllOnesValue() && SExt;
3119 }
3120 llvm_unreachable("Unexpected enumeration.");
3121}
3122
3123/// This helper function of SimplifySetCC tries to optimize the comparison when
3124/// either operand of the SetCC node is a bitwise-and instruction.
3125SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3126 ISD::CondCode Cond, const SDLoc &DL,
3127 DAGCombinerInfo &DCI) const {
3128 // Match these patterns in any of their permutations:
3129 // (X & Y) == Y
3130 // (X & Y) != Y
3131 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3132 std::swap(N0, N1);
3133
3134 EVT OpVT = N0.getValueType();
3135 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3136 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3137 return SDValue();
3138
3139 SDValue X, Y;
3140 if (N0.getOperand(0) == N1) {
3141 X = N0.getOperand(1);
3142 Y = N0.getOperand(0);
3143 } else if (N0.getOperand(1) == N1) {
3144 X = N0.getOperand(0);
3145 Y = N0.getOperand(1);
3146 } else {
3147 return SDValue();
3148 }
3149
3150 SelectionDAG &DAG = DCI.DAG;
3151 SDValue Zero = DAG.getConstant(0, DL, OpVT);
3152 if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3153 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3154 // Note that where Y is variable and is known to have at most one bit set
3155 // (for example, if it is Z & 1) we cannot do this; the expressions are not
3156 // equivalent when Y == 0.
3157 assert(OpVT.isInteger());
3158 Cond = ISD::getSetCCInverse(Cond, OpVT);
3159 if (DCI.isBeforeLegalizeOps() ||
3160 isCondCodeLegal(Cond, N0.getSimpleValueType()))
3161 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3162 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3163 // If the target supports an 'and-not' or 'and-complement' logic operation,
3164 // try to use that to make a comparison operation more efficient.
3165 // But don't do this transform if the mask is a single bit because there are
3166 // more efficient ways to deal with that case (for example, 'bt' on x86 or
3167 // 'rlwinm' on PPC).
3168
3169 // Bail out if the compare operand that we want to turn into a zero is
3170 // already a zero (otherwise, infinite loop).
3171 auto *YConst = dyn_cast<ConstantSDNode>(Y);
3172 if (YConst && YConst->isNullValue())
3173 return SDValue();
3174
3175 // Transform this into: ~X & Y == 0.
3176 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3177 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3178 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3179 }
3180
3181 return SDValue();
3182}
3183
3184/// There are multiple IR patterns that could be checking whether certain
3185/// truncation of a signed number would be lossy or not. The pattern which is
3186/// best at IR level, may not lower optimally. Thus, we want to unfold it.
3187/// We are looking for the following pattern: (KeptBits is a constant)
3188/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3189/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3190/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
3191/// We will unfold it into the natural trunc+sext pattern:
3192/// ((%x << C) a>> C) dstcond %x
3193/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
3194SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3195 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3196 const SDLoc &DL) const {
3197 // We must be comparing with a constant.
3198 ConstantSDNode *C1;
3199 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3200 return SDValue();
3201
3202 // N0 should be: add %x, (1 << (KeptBits-1))
3203 if (N0->getOpcode() != ISD::ADD)
3204 return SDValue();
3205
3206 // And we must be 'add'ing a constant.
3207 ConstantSDNode *C01;
3208 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3209 return SDValue();
3210
3211 SDValue X = N0->getOperand(0);
3212 EVT XVT = X.getValueType();
3213
3214 // Validate constants ...
3215
3216 APInt I1 = C1->getAPIntValue();
3217
3218 ISD::CondCode NewCond;
3219 if (Cond == ISD::CondCode::SETULT) {
3220 NewCond = ISD::CondCode::SETEQ;
3221 } else if (Cond == ISD::CondCode::SETULE) {
3222 NewCond = ISD::CondCode::SETEQ;
3223 // But need to 'canonicalize' the constant.
3224 I1 += 1;
3225 } else if (Cond == ISD::CondCode::SETUGT) {
3226 NewCond = ISD::CondCode::SETNE;
3227 // But need to 'canonicalize' the constant.
3228 I1 += 1;
3229 } else if (Cond == ISD::CondCode::SETUGE) {
3230 NewCond = ISD::CondCode::SETNE;
3231 } else
3232 return SDValue();
3233
3234 APInt I01 = C01->getAPIntValue();
3235
3236 auto checkConstants = [&I1, &I01]() -> bool {
3237 // Both of them must be power-of-two, and the constant from setcc is bigger.
3238 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3239 };
3240
3241 if (checkConstants()) {
3242 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
3243 } else {
3244 // What if we invert constants? (and the target predicate)
3245 I1.negate();
3246 I01.negate();
3247 assert(XVT.isInteger());
3248 NewCond = getSetCCInverse(NewCond, XVT);
3249 if (!checkConstants())
3250 return SDValue();
3251 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
3252 }
3253
3254 // They are power-of-two, so which bit is set?
3255 const unsigned KeptBits = I1.logBase2();
3256 const unsigned KeptBitsMinusOne = I01.logBase2();
3257
3258 // Magic!
3259 if (KeptBits != (KeptBitsMinusOne + 1))
3260 return SDValue();
3261 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3262
3263 // We don't want to do this in every single case.
3264 SelectionDAG &DAG = DCI.DAG;
3265 if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
3266 XVT, KeptBits))
3267 return SDValue();
3268
3269 const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3270 assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3271
3272 // Unfold into: ((%x << C) a>> C) cond %x
3273 // Where 'cond' will be either 'eq' or 'ne'.
3274 SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3275 SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3276 SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3277 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3278
3279 return T2;
3280}
3281
3282// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3283SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3284 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3285 DAGCombinerInfo &DCI, const SDLoc &DL) const {
3286 assert(isConstOrConstSplat(N1C) &&
3287 isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
3288 "Should be a comparison with 0.");
3289 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3290 "Valid only for [in]equality comparisons.");
3291
3292 unsigned NewShiftOpcode;
3293 SDValue X, C, Y;
3294
3295 SelectionDAG &DAG = DCI.DAG;
3296 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3297
3298 // Look for '(C l>>/<< Y)'.
3299 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3300 // The shift should be one-use.
3301 if (!V.hasOneUse())
3302 return false;
3303 unsigned OldShiftOpcode = V.getOpcode();
3304 switch (OldShiftOpcode) {
3305 case ISD::SHL:
3306 NewShiftOpcode = ISD::SRL;
3307 break;
3308 case ISD::SRL:
3309 NewShiftOpcode = ISD::SHL;
3310 break;
3311 default:
3312 return false; // must be a logical shift.
3313 }
3314 // We should be shifting a constant.
3315 // FIXME: best to use isConstantOrConstantVector().
3316 C = V.getOperand(0);
3317 ConstantSDNode *CC =
3318 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3319 if (!CC)
3320 return false;
3321 Y = V.getOperand(1);
3322
3323 ConstantSDNode *XC =
3324 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3325 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3326 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3327 };
3328
3329 // LHS of comparison should be an one-use 'and'.
3330 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3331 return SDValue();
3332
3333 X = N0.getOperand(0);
3334 SDValue Mask = N0.getOperand(1);
3335
3336 // 'and' is commutative!
3337 if (!Match(Mask)) {
3338 std::swap(X, Mask);
3339 if (!Match(Mask))
3340 return SDValue();
3341 }
3342
3343 EVT VT = X.getValueType();
3344
3345 // Produce:
3346 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3347 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3348 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3349 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3350 return T2;
3351}
3352
3353/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3354/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3355/// handle the commuted versions of these patterns.
3356SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3357 ISD::CondCode Cond, const SDLoc &DL,
3358 DAGCombinerInfo &DCI) const {
3359 unsigned BOpcode = N0.getOpcode();
3360 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3361 "Unexpected binop");
3362 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3363
3364 // (X + Y) == X --> Y == 0
3365 // (X - Y) == X --> Y == 0
3366 // (X ^ Y) == X --> Y == 0
3367 SelectionDAG &DAG = DCI.DAG;
3368 EVT OpVT = N0.getValueType();
3369 SDValue X = N0.getOperand(0);
3370 SDValue Y = N0.getOperand(1);
3371 if (X == N1)
3372 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3373
3374 if (Y != N1)
3375 return SDValue();
3376
3377 // (X + Y) == Y --> X == 0
3378 // (X ^ Y) == Y --> X == 0
3379 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3380 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3381
3382 // The shift would not be valid if the operands are boolean (i1).
3383 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3384 return SDValue();
3385
3386 // (X - Y) == Y --> X == Y << 1
3387 EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3388 !DCI.isBeforeLegalize());
3389 SDValue One = DAG.getConstant(1, DL, ShiftVT);
3390 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3391 if (!DCI.isCalledByLegalizer())
3392 DCI.AddToWorklist(YShl1.getNode());
3393 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3394}
3395
3396static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
3397 SDValue N0, const APInt &C1,
3398 ISD::CondCode Cond, const SDLoc &dl,
3399 SelectionDAG &DAG) {
3400 // Look through truncs that don't change the value of a ctpop.
3401 // FIXME: Add vector support? Need to be careful with setcc result type below.
3402 SDValue CTPOP = N0;
3403 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
3404 N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
3405 CTPOP = N0.getOperand(0);
3406
3407 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
3408 return SDValue();
3409
3410 EVT CTVT = CTPOP.getValueType();
3411 SDValue CTOp = CTPOP.getOperand(0);
3412
3413 // If this is a vector CTPOP, keep the CTPOP if it is legal.
3414 // TODO: Should we check if CTPOP is legal(or custom) for scalars?
3415 if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
3416 return SDValue();
3417
3418 // (ctpop x) u< 2 -> (x & x-1) == 0
3419 // (ctpop x) u> 1 -> (x & x-1) != 0
3420 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
3421 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
3422 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
3423 return SDValue();
3424 if (C1 == 0 && (Cond == ISD::SETULT))
3425 return SDValue(); // This is handled elsewhere.
3426
3427 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
3428
3429 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3430 SDValue Result = CTOp;
3431 for (unsigned i = 0; i < Passes; i++) {
3432 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
3433 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
3434 }
3435 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
3436 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
3437 }
3438
3439 // If ctpop is not supported, expand a power-of-2 comparison based on it.
3440 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
3441 // For scalars, keep CTPOP if it is legal or custom.
3442 if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
3443 return SDValue();
3444 // This is based on X86's custom lowering for CTPOP which produces more
3445 // instructions than the expansion here.
3446
3447 // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3448 // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3449 SDValue Zero = DAG.getConstant(0, dl, CTVT);
3450 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3451 assert(CTVT.isInteger());
3452 ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
3453 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3454 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3455 SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3456 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3457 unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3458 return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3459 }
3460
3461 return SDValue();
3462}
3463
3464/// Try to simplify a setcc built with the specified operands and cc. If it is
3465/// unable to simplify it, return a null SDValue.
3466SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
3467 ISD::CondCode Cond, bool foldBooleans,
3468 DAGCombinerInfo &DCI,
3469 const SDLoc &dl) const {
3470 SelectionDAG &DAG = DCI.DAG;
3471 const DataLayout &Layout = DAG.getDataLayout();
3472 EVT OpVT = N0.getValueType();
3473
3474 // Constant fold or commute setcc.
3475 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3476 return Fold;
3477
3478 // Ensure that the constant occurs on the RHS and fold constant comparisons.
3479 // TODO: Handle non-splat vector constants. All undef causes trouble.
3480 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
3481 // infinite loop here when we encounter one.
3482 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
3483 if (isConstOrConstSplat(N0) &&
3484 (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) &&
3485 (DCI.isBeforeLegalizeOps() ||
3486 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3487 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3488
3489 // If we have a subtract with the same 2 non-constant operands as this setcc
3490 // -- but in reverse order -- then try to commute the operands of this setcc
3491 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3492 // instruction on some targets.
3493 if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
3494 (DCI.isBeforeLegalizeOps() ||
3495 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3496 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
3497 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
3498 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3499
3500 if (auto *N1C = isConstOrConstSplat(N1)) {
3501 const APInt &C1 = N1C->getAPIntValue();
3502
3503 // Optimize some CTPOP cases.
3504 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
3505 return V;
3506
3507 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3508 // equality comparison, then we're just comparing whether X itself is
3509 // zero.
3510 if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
3511 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
3512 isPowerOf2_32(N0.getScalarValueSizeInBits())) {
3513 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
3514 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3515 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
3516 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
3517 // (srl (ctlz x), 5) == 0 -> X != 0
3518 // (srl (ctlz x), 5) != 1 -> X != 0
3519 Cond = ISD::SETNE;
3520 } else {
3521 // (srl (ctlz x), 5) != 0 -> X == 0
3522 // (srl (ctlz x), 5) == 1 -> X == 0
3523 Cond = ISD::SETEQ;
3524 }
3525 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
3526 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
3527 Cond);
3528 }
3529 }
3530 }
3531 }
3532
3533 // FIXME: Support vectors.
3534 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3535 const APInt &C1 = N1C->getAPIntValue();
3536
3537 // (zext x) == C --> x == (trunc C)
3538 // (sext x) == C --> x == (trunc C)
3539 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3540 DCI.isBeforeLegalize() && N0->hasOneUse()) {
3541 unsigned MinBits = N0.getValueSizeInBits();
3542 SDValue PreExt;
3543 bool Signed = false;
3544 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
3545 // ZExt
3546 MinBits = N0->getOperand(0).getValueSizeInBits();
3547 PreExt = N0->getOperand(0);
3548 } else if (N0->getOpcode() == ISD::AND) {
3549 // DAGCombine turns costly ZExts into ANDs
3550 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
3551 if ((C->getAPIntValue()+1).isPowerOf2()) {
3552 MinBits = C->getAPIntValue().countTrailingOnes();
3553 PreExt = N0->getOperand(0);
3554 }
3555 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
3556 // SExt
3557 MinBits = N0->getOperand(0).getValueSizeInBits();
3558 PreExt = N0->getOperand(0);
3559 Signed = true;
3560 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
3561 // ZEXTLOAD / SEXTLOAD
3562 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
3563 MinBits = LN0->getMemoryVT().getSizeInBits();
3564 PreExt = N0;
3565 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
3566 Signed = true;
3567 MinBits = LN0->getMemoryVT().getSizeInBits();
3568 PreExt = N0;
3569 }
3570 }
3571
3572 // Figure out how many bits we need to preserve this constant.
3573 unsigned ReqdBits = Signed ?
3574 C1.getBitWidth() - C1.getNumSignBits() + 1 :
3575 C1.getActiveBits();
3576
3577 // Make sure we're not losing bits from the constant.
3578 if (MinBits > 0 &&
3579 MinBits < C1.getBitWidth() &&
3580 MinBits >= ReqdBits) {
3581 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
3582 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
3583 // Will get folded away.
3584 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
3585 if (MinBits == 1 && C1 == 1)
3586 // Invert the condition.
3587 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
3588 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3589 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
3590 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
3591 }
3592
3593 // If truncating the setcc operands is not desirable, we can still
3594 // simplify the expression in some cases:
3595 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
3596 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
3597 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
3598 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
3599 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
3600 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
3601 SDValue TopSetCC = N0->getOperand(0);
3602 unsigned N0Opc = N0->getOpcode();
3603 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
3604 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
3605 TopSetCC.getOpcode() == ISD::SETCC &&
3606 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
3607 (isConstFalseVal(N1C) ||
3608 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
3609
3610 bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
3611 (!N1C->isNullValue() && Cond == ISD::SETNE);
3612
3613 if (!Inverse)
3614 return TopSetCC;
3615
3616 ISD::CondCode InvCond = ISD::getSetCCInverse(
3617 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
3618 TopSetCC.getOperand(0).getValueType());
3619 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
3620 TopSetCC.getOperand(1),
3621 InvCond);
3622 }
3623 }
3624 }
3625
3626 // If the LHS is '(and load, const)', the RHS is 0, the test is for
3627 // equality or unsigned, and all 1 bits of the const are in the same
3628 // partial word, see if we can shorten the load.
3629 if (DCI.isBeforeLegalize() &&
3630 !ISD::isSignedIntSetCC(Cond) &&
3631 N0.getOpcode() == ISD::AND && C1 == 0 &&
3632 N0.getNode()->hasOneUse() &&
3633 isa<LoadSDNode>(N0.getOperand(0)) &&
3634 N0.getOperand(0).getNode()->hasOneUse() &&
3635 isa<ConstantSDNode>(N0.getOperand(1))) {
3636 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
3637 APInt bestMask;
3638 unsigned bestWidth = 0, bestOffset = 0;
3639 if (Lod->isSimple() && Lod->isUnindexed()) {
3640 unsigned origWidth = N0.getValueSizeInBits();
3641 unsigned maskWidth = origWidth;
3642 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3643 // 8 bits, but have to be careful...
3644 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
3645 origWidth = Lod->getMemoryVT().getSizeInBits();
3646 const APInt &Mask = N0.getConstantOperandAPInt(1);
3647 for (unsigned width = origWidth / 2; width>=8; width /= 2) {
3648 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
3649 for (unsigned offset=0; offset<origWidth/width; offset++) {
3650 if (Mask.isSubsetOf(newMask)) {
3651 if (Layout.isLittleEndian())
3652 bestOffset = (uint64_t)offset * (width/8);
3653 else
3654 bestOffset = (origWidth/width - offset - 1) * (width/8);
3655 bestMask = Mask.lshr(offset * (width/8) * 8);
3656 bestWidth = width;
3657 break;
3658 }
3659 newMask <<= width;
3660 }
3661 }
3662 }
3663 if (bestWidth) {
3664 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
3665 if (newVT.isRound() &&
3666 shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
3667 SDValue Ptr = Lod->getBasePtr();
3668 if (bestOffset != 0)
3669 Ptr =
3670 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
3671 SDValue NewLoad =
3672 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
3673 Lod->getPointerInfo().getWithOffset(bestOffset),
3674 Lod->getOriginalAlign());
3675 return DAG.getSetCC(dl, VT,
3676 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
3677 DAG.getConstant(bestMask.trunc(bestWidth),
3678 dl, newVT)),
3679 DAG.getConstant(0LL, dl, newVT), Cond);
3680 }
3681 }
3682 }
3683
3684 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3685 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
3686 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
3687
3688 // If the comparison constant has bits in the upper part, the
3689 // zero-extended value could never match.
3690 if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
3691 C1.getBitWidth() - InSize))) {
3692 switch (Cond) {
3693 case ISD::SETUGT:
3694 case ISD::SETUGE:
3695 case ISD::SETEQ:
3696 return DAG.getConstant(0, dl, VT);
3697 case ISD::SETULT:
3698 case ISD::SETULE:
3699 case ISD::SETNE:
3700 return DAG.getConstant(1, dl, VT);
3701 case ISD::SETGT:
3702 case ISD::SETGE:
3703 // True if the sign bit of C1 is set.
3704 return DAG.getConstant(C1.isNegative(), dl, VT);
3705 case ISD::SETLT:
3706 case ISD::SETLE:
3707 // True if the sign bit of C1 isn't set.
3708 return DAG.getConstant(C1.isNonNegative(), dl, VT);
3709 default:
3710 break;
3711 }
3712 }
3713
3714 // Otherwise, we can perform the comparison with the low bits.
3715 switch (Cond) {
3716 case ISD::SETEQ:
3717 case ISD::SETNE:
3718 case ISD::SETUGT:
3719 case ISD::SETUGE:
3720 case ISD::SETULT:
3721 case ISD::SETULE: {
3722 EVT newVT = N0.getOperand(0).getValueType();
3723 if (DCI.isBeforeLegalizeOps() ||
3724 (isOperationLegal(ISD::SETCC, newVT) &&
3725 isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
3726 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
3727 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
3728
3729 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
3730 NewConst, Cond);
3731 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
3732 }
3733 break;
3734 }
3735 default:
3736 break; // todo, be more careful with signed comparisons
3737 }
3738 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3739 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3740 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
3741 OpVT)) {
3742 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
3743 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
3744 EVT ExtDstTy = N0.getValueType();
3745 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
3746
3747 // If the constant doesn't fit into the number of bits for the source of
3748 // the sign extension, it is impossible for both sides to be equal.
3749 if (C1.getMinSignedBits() > ExtSrcTyBits)
3750 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
3751
3752 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
3753 ExtDstTy != ExtSrcTy && "Unexpected types!");
3754 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
3755 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
3756 DAG.getConstant(Imm, dl, ExtDstTy));
3757 if (!DCI.isCalledByLegalizer())
3758 DCI.AddToWorklist(ZextOp.getNode());
3759 // Otherwise, make this a use of a zext.
3760 return DAG.getSetCC(dl, VT, ZextOp,
3761 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
3762 } else if ((N1C->isNullValue() || N1C->isOne()) &&
3763 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3764 // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
3765 if (N0.getOpcode() == ISD::SETCC &&
3766 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
3767 (N0.getValueType() == MVT::i1 ||
3768 getBooleanContents(N0.getOperand(0).getValueType()) ==
3769 ZeroOrOneBooleanContent)) {
3770 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
3771 if (TrueWhenTrue)
3772 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
3773 // Invert the condition.
3774 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
3775 CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
3776 if (DCI.isBeforeLegalizeOps() ||
3777 isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
3778 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
3779 }
3780
3781 if ((N0.getOpcode() == ISD::XOR ||
3782 (N0.getOpcode() == ISD::AND &&
3783 N0.getOperand(0).getOpcode() == ISD::XOR &&
3784 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
3785 isOneConstant(N0.getOperand(1))) {
3786 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
3787 // can only do this if the top bits are known zero.
3788 unsigned BitWidth = N0.getValueSizeInBits();
3789 if (DAG.MaskedValueIsZero(N0,
3790 APInt::getHighBitsSet(BitWidth,
3791 BitWidth-1))) {
3792 // Okay, get the un-inverted input value.
3793 SDValue Val;
3794 if (N0.getOpcode() == ISD::XOR) {
3795 Val = N0.getOperand(0);
3796 } else {
3797 assert(N0.getOpcode() == ISD::AND &&
3798 N0.getOperand(0).getOpcode() == ISD::XOR);
3799 // ((X^1)&1)^1 -> X & 1
3800 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
3801 N0.getOperand(0).getOperand(0),
3802 N0.getOperand(1));
3803 }
3804
3805 return DAG.getSetCC(dl, VT, Val, N1,
3806 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3807 }
3808 } else if (N1C->isOne()) {
3809 SDValue Op0 = N0;
3810 if (Op0.getOpcode() == ISD::TRUNCATE)
3811 Op0 = Op0.getOperand(0);
3812
3813 if ((Op0.getOpcode() == ISD::XOR) &&
3814 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
3815 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
3816 SDValue XorLHS = Op0.getOperand(0);
3817 SDValue XorRHS = Op0.getOperand(1);
3818 // Ensure that the input setccs return an i1 type or 0/1 value.
3819 if (Op0.getValueType() == MVT::i1 ||
3820 (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
3821 ZeroOrOneBooleanContent &&
3822 getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
3823 ZeroOrOneBooleanContent)) {
3824 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3825 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
3826 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
3827 }
3828 }
3829 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
3830 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3831 if (Op0.getValueType().bitsGT(VT))
3832 Op0 = DAG.getNode(ISD::AND, dl, VT,
3833 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
3834 DAG.getConstant(1, dl, VT));
3835 else if (Op0.getValueType().bitsLT(VT))
3836 Op0 = DAG.getNode(ISD::AND, dl, VT,
3837 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
3838 DAG.getConstant(1, dl, VT));
3839
3840 return DAG.getSetCC(dl, VT, Op0,
3841 DAG.getConstant(0, dl, Op0.getValueType()),
3842 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3843 }
3844 if (Op0.getOpcode() == ISD::AssertZext &&
3845 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
3846 return DAG.getSetCC(dl, VT, Op0,
3847 DAG.getConstant(0, dl, Op0.getValueType()),
3848 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3849 }
3850 }
3851
3852 // Given:
3853 // icmp eq/ne (urem %x, %y), 0
3854 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3855 // icmp eq/ne %x, 0
3856 if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
3857 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3858 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
3859 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
3860 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
3861 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
3862 }
3863
3864 if (SDValue V =
3865 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
3866 return V;
3867 }
3868
3869 // These simplifications apply to splat vectors as well.
3870 // TODO: Handle more splat vector cases.
3871 if (auto *N1C = isConstOrConstSplat(N1)) {
3872 const APInt &C1 = N1C->getAPIntValue();
3873
3874 APInt MinVal, MaxVal;
3875 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
3876 if (ISD::isSignedIntSetCC(Cond)) {
3877 MinVal = APInt::getSignedMinValue(OperandBitSize);
3878 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
3879 } else {
3880 MinVal = APInt::getMinValue(OperandBitSize);
3881 MaxVal = APInt::getMaxValue(OperandBitSize);
3882 }
3883
3884 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3885 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
3886 // X >= MIN --> true
3887 if (C1 == MinVal)
3888 return DAG.getBoolConstant(true, dl, VT, OpVT);
3889
3890 if (!VT.isVector()) { // TODO: Support this for vectors.
3891 // X >= C0 --> X > (C0 - 1)
3892 APInt C = C1 - 1;
3893 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
3894 if ((DCI.isBeforeLegalizeOps() ||
3895 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3896 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3897 isLegalICmpImmediate(C.getSExtValue())))) {
3898 return DAG.getSetCC(dl, VT, N0,
3899 DAG.getConstant(C, dl, N1.getValueType()),
3900 NewCC);
3901 }
3902 }
3903 }
3904
3905 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
3906 // X <= MAX --> true
3907 if (C1 == MaxVal)
3908 return DAG.getBoolConstant(true, dl, VT, OpVT);
3909
3910 // X <= C0 --> X < (C0 + 1)
3911 if (!VT.isVector()) { // TODO: Support this for vectors.
3912 APInt C = C1 + 1;
3913 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
3914 if ((DCI.isBeforeLegalizeOps() ||
3915 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3916 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3917 isLegalICmpImmediate(C.getSExtValue())))) {
3918 return DAG.getSetCC(dl, VT, N0,
3919 DAG.getConstant(C, dl, N1.getValueType()),
3920 NewCC);
3921 }
3922 }
3923 }
3924
3925 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
3926 if (C1 == MinVal)
3927 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
3928
3929 // TODO: Support this for vectors after legalize ops.
3930 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3931 // Canonicalize setlt X, Max --> setne X, Max
3932 if (C1 == MaxVal)
3933 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3934
3935 // If we have setult X, 1, turn it into seteq X, 0
3936 if (C1 == MinVal+1)
3937 return DAG.getSetCC(dl, VT, N0,
3938 DAG.getConstant(MinVal, dl, N0.getValueType()),
3939 ISD::SETEQ);
3940 }
3941 }
3942
3943 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
3944 if (C1 == MaxVal)
3945 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
3946
3947 // TODO: Support this for vectors after legalize ops.
3948 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3949 // Canonicalize setgt X, Min --> setne X, Min
3950 if (C1 == MinVal)
3951 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3952
3953 // If we have setugt X, Max-1, turn it into seteq X, Max
3954 if (C1 == MaxVal-1)
3955 return DAG.getSetCC(dl, VT, N0,
3956 DAG.getConstant(MaxVal, dl, N0.getValueType()),
3957 ISD::SETEQ);
3958 }
3959 }
3960
3961 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
3962 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3963 if (C1.isNullValue())
3964 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
3965 VT, N0, N1, Cond, DCI, dl))
3966 return CC;
3967
3968 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
3969 // For example, when high 32-bits of i64 X are known clear:
3970 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
3971 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
3972 bool CmpZero = N1C->getAPIntValue().isNullValue();
3973 bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
3974 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
3975 // Match or(lo,shl(hi,bw/2)) pattern.
3976 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
3977 unsigned EltBits = V.getScalarValueSizeInBits();
3978 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
3979 return false;
3980 SDValue LHS = V.getOperand(0);
3981 SDValue RHS = V.getOperand(1);
3982 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
3983 // Unshifted element must have zero upperbits.
3984 if (RHS.getOpcode() == ISD::SHL &&
3985 isa<ConstantSDNode>(RHS.getOperand(1)) &&
3986 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
3987 DAG.MaskedValueIsZero(LHS, HiBits)) {
3988 Lo = LHS;
3989 Hi = RHS.getOperand(0);
3990 return true;
3991 }
3992 if (LHS.getOpcode() == ISD::SHL &&
3993 isa<ConstantSDNode>(LHS.getOperand(1)) &&
3994 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
3995 DAG.MaskedValueIsZero(RHS, HiBits)) {
3996 Lo = RHS;
3997 Hi = LHS.getOperand(0);
3998 return true;
3999 }
4000 return false;
4001 };
4002
4003 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
4004 unsigned EltBits = N0.getScalarValueSizeInBits();
4005 unsigned HalfBits = EltBits / 2;
4006 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
4007 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
4008 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
4009 SDValue NewN0 =
4010 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
4011 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
4012 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
4013 };
4014
4015 SDValue Lo, Hi;
4016 if (IsConcat(N0, Lo, Hi))
4017 return MergeConcat(Lo, Hi);
4018
4019 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
4020 SDValue Lo0, Lo1, Hi0, Hi1;
4021 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
4022 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
4023 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
4024 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
4025 }
4026 }
4027 }
4028 }
4029
4030 // If we have "setcc X, C0", check to see if we can shrink the immediate
4031 // by changing cc.
4032 // TODO: Support this for vectors after legalize ops.
4033 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4034 // SETUGT X, SINTMAX -> SETLT X, 0
4035 // SETUGE X, SINTMIN -> SETLT X, 0
4036 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
4037 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
4038 return DAG.getSetCC(dl, VT, N0,
4039 DAG.getConstant(0, dl, N1.getValueType()),
4040 ISD::SETLT);
4041
4042 // SETULT X, SINTMIN -> SETGT X, -1
4043 // SETULE X, SINTMAX -> SETGT X, -1
4044 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
4045 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
4046 return DAG.getSetCC(dl, VT, N0,
4047 DAG.getAllOnesConstant(dl, N1.getValueType()),
4048 ISD::SETGT);
4049 }
4050 }
4051
4052 // Back to non-vector simplifications.
4053 // TODO: Can we do these for vector splats?
4054 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4055 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4056 const APInt &C1 = N1C->getAPIntValue();
4057 EVT ShValTy = N0.getValueType();
4058
4059 // Fold bit comparisons when we can. This will result in an
4060 // incorrect value when boolean false is negative one, unless
4061 // the bitsize is 1 in which case the false value is the same
4062 // in practice regardless of the representation.
4063 if ((VT.getSizeInBits() == 1 ||
4064 getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4065 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4066 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
4067 N0.getOpcode() == ISD::AND) {
4068 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4069 EVT ShiftTy =
4070 getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4071 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
4072 // Perform the xform if the AND RHS is a single bit.
4073 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
4074 if (AndRHS->getAPIntValue().isPowerOf2() &&
4075 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4076 return DAG.getNode(ISD::TRUNCATE, dl, VT,
4077 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4078 DAG.getConstant(ShCt, dl, ShiftTy)));
4079 }
4080 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
4081 // (X & 8) == 8 --> (X & 8) >> 3
4082 // Perform the xform if C1 is a single bit.
4083 unsigned ShCt = C1.logBase2();
4084 if (C1.isPowerOf2() &&
4085 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4086 return DAG.getNode(ISD::TRUNCATE, dl, VT,
4087 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4088 DAG.getConstant(ShCt, dl, ShiftTy)));
4089 }
4090 }
4091 }
4092 }
4093
4094 if (C1.getMinSignedBits() <= 64 &&
4095 !isLegalICmpImmediate(C1.getSExtValue())) {
4096 EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4097 // (X & -256) == 256 -> (X >> 8) == 1
4098 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4099 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4100 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4101 const APInt &AndRHSC = AndRHS->getAPIntValue();
4102 if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
4103 unsigned ShiftBits = AndRHSC.countTrailingZeros();
4104 if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4105 SDValue Shift =
4106 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
4107 DAG.getConstant(ShiftBits, dl, ShiftTy));
4108 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
4109 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
4110 }
4111 }
4112 }
4113 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
4114 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
4115 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
4116 // X < 0x100000000 -> (X >> 32) < 1
4117 // X >= 0x100000000 -> (X >> 32) >= 1
4118 // X <= 0x0ffffffff -> (X >> 32) < 1
4119 // X > 0x0ffffffff -> (X >> 32) >= 1
4120 unsigned ShiftBits;
4121 APInt NewC = C1;
4122 ISD::CondCode NewCond = Cond;
4123 if (AdjOne) {
4124 ShiftBits = C1.countTrailingOnes();
4125 NewC = NewC + 1;
4126 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4127 } else {
4128 ShiftBits = C1.countTrailingZeros();
4129 }
4130 NewC.lshrInPlace(ShiftBits);
4131 if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
4132 isLegalICmpImmediate(NewC.getSExtValue()) &&
4133 !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4134 SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4135 DAG.getConstant(ShiftBits, dl, ShiftTy));
4136 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
4137 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
4138 }
4139 }
4140 }
4141 }
4142
4143 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
4144 auto *CFP = cast<ConstantFPSDNode>(N1);
4145 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
4146
4147 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
4148 // constant if knowing that the operand is non-nan is enough. We prefer to
4149 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
4150 // materialize 0.0.
4151 if (Cond == ISD::SETO || Cond == ISD::SETUO)
4152 return DAG.getSetCC(dl, VT, N0, N0, Cond);
4153
4154 // setcc (fneg x), C -> setcc swap(pred) x, -C
4155 if (N0.getOpcode() == ISD::FNEG) {
4156 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
4157 if (DCI.isBeforeLegalizeOps() ||
4158 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
4159 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
4160 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
4161 }
4162 }
4163
4164 // If the condition is not legal, see if we can find an equivalent one
4165 // which is legal.
4166 if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
4167 // If the comparison was an awkward floating-point == or != and one of
4168 // the comparison operands is infinity or negative infinity, convert the
4169 // condition to a less-awkward <= or >=.
4170 if (CFP->getValueAPF().isInfinity()) {
4171 bool IsNegInf = CFP->getValueAPF().isNegative();
4172 ISD::CondCode NewCond = ISD::SETCC_INVALID;
4173 switch (Cond) {
4174 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
4175 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
4176 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
4177 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
4178 default: break;
4179 }
4180 if (NewCond != ISD::SETCC_INVALID &&
4181 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
4182 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4183 }
4184 }
4185 }
4186
4187 if (N0 == N1) {
4188 // The sext(setcc()) => setcc() optimization relies on the appropriate
4189 // constant being emitted.
4190 assert(!N0.getValueType().isInteger() &&
4191 "Integer types should be handled by FoldSetCC");
4192
4193 bool EqTrue = ISD::isTrueWhenEqual(Cond);
4194 unsigned UOF = ISD::getUnorderedFlavor(Cond);
4195 if (UOF == 2) // FP operators that are undefined on NaNs.
4196 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4197 if (UOF == unsigned(EqTrue))
4198 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4199 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
4200 // if it is not already.
4201 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
4202 if (NewCond != Cond &&
4203 (DCI.isBeforeLegalizeOps() ||
4204 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
4205 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4206 }
4207
4208 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4209 N0.getValueType().isInteger()) {
4210 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
4211 N0.getOpcode() == ISD::XOR) {
4212 // Simplify (X+Y) == (X+Z) --> Y == Z
4213 if (N0.getOpcode() == N1.getOpcode()) {
4214 if (N0.getOperand(0) == N1.getOperand(0))
4215 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
4216 if (N0.getOperand(1) == N1.getOperand(1))
4217 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
4218 if (isCommutativeBinOp(N0.getOpcode())) {
4219 // If X op Y == Y op X, try other combinations.
4220 if (N0.getOperand(0) == N1.getOperand(1))
4221 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
4222 Cond);
4223 if (N0.getOperand(1) == N1.getOperand(0))
4224 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
4225 Cond);
4226 }
4227 }
4228
4229 // If RHS is a legal immediate value for a compare instruction, we need
4230 // to be careful about increasing register pressure needlessly.
4231 bool LegalRHSImm = false;
4232
4233 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
4234 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4235 // Turn (X+C1) == C2 --> X == C2-C1
4236 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
4237 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4238 DAG.getConstant(RHSC->getAPIntValue()-
4239 LHSR->getAPIntValue(),
4240 dl, N0.getValueType()), Cond);
4241 }
4242
4243 // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
4244 if (N0.getOpcode() == ISD::XOR)
4245 // If we know that all of the inverted bits are zero, don't bother
4246 // performing the inversion.
4247 if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
4248 return
4249 DAG.getSetCC(dl, VT, N0.getOperand(0),
4250 DAG.getConstant(LHSR->getAPIntValue() ^
4251 RHSC->getAPIntValue(),
4252 dl, N0.getValueType()),
4253 Cond);
4254 }
4255
4256 // Turn (C1-X) == C2 --> X == C1-C2
4257 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
4258 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
4259 return
4260 DAG.getSetCC(dl, VT, N0.getOperand(1),
4261 DAG.getConstant(SUBC->getAPIntValue() -
4262 RHSC->getAPIntValue(),
4263 dl, N0.getValueType()),
4264 Cond);
4265 }
4266 }
4267
4268 // Could RHSC fold directly into a compare?
4269 if (RHSC->getValueType(0).getSizeInBits() <= 64)
4270 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
4271 }
4272
4273 // (X+Y) == X --> Y == 0 and similar folds.
4274 // Don't do this if X is an immediate that can fold into a cmp
4275 // instruction and X+Y has other uses. It could be an induction variable
4276 // chain, and the transform would increase register pressure.
4277 if (!LegalRHSImm || N0.hasOneUse())
4278 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
4279 return V;
4280 }
4281
4282 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
4283 N1.getOpcode() == ISD::XOR)
4284 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
4285 return V;
4286
4287 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
4288 return V;
4289 }
4290
4291 // Fold remainder of division by a constant.
4292 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
4293 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4294 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4295
4296 // When division is cheap or optimizing for minimum size,
4297 // fall through to DIVREM creation by skipping this fold.
4298 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
4299 if (N0.getOpcode() == ISD::UREM) {
4300 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
4301 return Folded;
4302 } else if (N0.getOpcode() == ISD::SREM) {
4303 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
4304 return Folded;
4305 }
4306 }
4307 }
4308
4309 // Fold away ALL boolean setcc's.
4310 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
4311 SDValue Temp;
4312 switch (Cond) {
4313 default: llvm_unreachable("Unknown integer setcc!");
4314 case ISD::SETEQ: // X == Y -> ~(X^Y)
4315 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4316 N0 = DAG.getNOT(dl, Temp, OpVT);
4317 if (!DCI.isCalledByLegalizer())
4318 DCI.AddToWorklist(Temp.getNode());
4319 break;
4320 case ISD::SETNE: // X != Y --> (X^Y)
4321 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4322 break;
4323 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
4324 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
4325 Temp = DAG.getNOT(dl, N0, OpVT);
4326 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
4327 if (!DCI.isCalledByLegalizer())
4328 DCI.AddToWorklist(Temp.getNode());
4329 break;
4330 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
4331 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
4332 Temp = DAG.getNOT(dl, N1, OpVT);
4333 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
4334 if (!DCI.isCalledByLegalizer())
4335 DCI.AddToWorklist(Temp.getNode());
4336 break;
4337 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
4338 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
4339 Temp = DAG.getNOT(dl, N0, OpVT);
4340 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
4341 if (!DCI.isCalledByLegalizer())
4342 DCI.AddToWorklist(Temp.getNode());
4343 break;
4344 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
4345 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
4346 Temp = DAG.getNOT(dl, N1, OpVT);
4347 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
4348 break;
4349 }
4350 if (VT.getScalarType() != MVT::i1) {
4351 if (!DCI.isCalledByLegalizer())
4352 DCI.AddToWorklist(N0.getNode());
4353 // FIXME: If running after legalize, we probably can't do this.
4354 ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
4355 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
4356 }
4357 return N0;
4358 }
4359
4360 // Could not fold it.
4361 return SDValue();
4362}
4363
4364/// Returns true (and the GlobalValue and the offset) if the node is a
4365/// GlobalAddress + offset.
4366bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
4367 int64_t &Offset) const {
4368
4369 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
4370
4371 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
4372 GA = GASD->getGlobal();
4373 Offset += GASD->getOffset();
4374 return true;
4375 }
4376
4377 if (N->getOpcode() == ISD::ADD) {
4378 SDValue N1 = N->getOperand(0);
4379 SDValue N2 = N->getOperand(1);
4380 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
4381 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
4382 Offset += V->getSExtValue();
4383 return true;
4384 }
4385 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
4386 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
4387 Offset += V->getSExtValue();
4388 return true;
4389 }
4390 }
4391 }
4392
4393 return false;
4394}
4395
4396SDValue TargetLowering::PerformDAGCombine(SDNode *N,
4397 DAGCombinerInfo &DCI) const {
4398 // Default implementation: no optimization.
4399 return SDValue();
4400}
4401
4402//===----------------------------------------------------------------------===//
4403// Inline Assembler Implementation Methods
4404//===----------------------------------------------------------------------===//
4405
4406TargetLowering::ConstraintType
4407TargetLowering::getConstraintType(StringRef Constraint) const {
4408 unsigned S = Constraint.size();
4409
4410 if (S == 1) {
4411 switch (Constraint[0]) {
4412 default: break;
4413 case 'r':
4414 return C_RegisterClass;
4415 case 'm': // memory
4416 case 'o': // offsetable
4417 case 'V': // not offsetable
4418 return C_Memory;
4419 case 'n': // Simple Integer
4420 case 'E': // Floating Point Constant
4421 case 'F': // Floating Point Constant
4422 return C_Immediate;
4423 case 'i': // Simple Integer or Relocatable Constant
4424 case 's': // Relocatable Constant
4425 case 'p': // Address.
4426 case 'X': // Allow ANY value.
4427 case 'I': // Target registers.
4428 case 'J':
4429 case 'K':
4430 case 'L':
4431 case 'M':
4432 case 'N':
4433 case 'O':
4434 case 'P':
4435 case '<':
4436 case '>':
4437 return C_Other;
4438 }
4439 }
4440
4441 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4442 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4443 return C_Memory;
4444 return C_Register;
4445 }
4446 return C_Unknown;
4447}
4448
4449/// Try to replace an X constraint, which matches anything, with another that
4450/// has more specific requirements based on the type of the corresponding
4451/// operand.
4452const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4453 if (ConstraintVT.isInteger())
4454 return "r";
4455 if (ConstraintVT.isFloatingPoint())
4456 return "f"; // works for many targets
4457 return nullptr;
4458}
4459
4460SDValue TargetLowering::LowerAsmOutputForConstraint(
4461 SDValue &Chain, SDValue &Flag, const SDLoc &DL,
4462 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
4463 return SDValue();
4464}
4465
4466/// Lower the specified operand into the Ops vector.
4467/// If it is invalid, don't add anything to Ops.
4468void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4469 std::string &Constraint,
4470 std::vector<SDValue> &Ops,
4471 SelectionDAG &DAG) const {
4472
4473 if (Constraint.length() > 1) return;
4474
4475 char ConstraintLetter = Constraint[0];
4476 switch (ConstraintLetter) {
4477 default: break;
4478 case 'X': // Allows any operand; labels (basic block) use this.
4479 if (Op.getOpcode() == ISD::BasicBlock ||
4480 Op.getOpcode() == ISD::TargetBlockAddress) {
4481 Ops.push_back(Op);
4482 return;
4483 }
4484 LLVM_FALLTHROUGH;
4485 case 'i': // Simple Integer or Relocatable Constant
4486 case 'n': // Simple Integer
4487 case 's': { // Relocatable Constant
4488
4489 GlobalAddressSDNode *GA;
4490 ConstantSDNode *C;
4491 BlockAddressSDNode *BA;
4492 uint64_t Offset = 0;
4493
4494 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4495 // etc., since getelementpointer is variadic. We can't use
4496 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4497 // while in this case the GA may be furthest from the root node which is
4498 // likely an ISD::ADD.
4499 while (1) {
4500 if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
4501 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
4502 GA->getValueType(0),
4503 Offset + GA->getOffset()));
4504 return;
4505 } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
4506 ConstraintLetter != 's') {
4507 // gcc prints these as sign extended. Sign extend value to 64 bits
4508 // now; without this it would get ZExt'd later in
4509 // ScheduleDAGSDNodes::EmitNode, which is very generic.
4510 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
4511 BooleanContent BCont = getBooleanContents(MVT::i64);
4512 ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
4513 : ISD::SIGN_EXTEND;
4514 int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
4515 : C->getSExtValue();
4516 Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
4517 SDLoc(C), MVT::i64));
4518 return;
4519 } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
4520 ConstraintLetter != 'n') {
4521 Ops.push_back(DAG.getTargetBlockAddress(
4522 BA->getBlockAddress(), BA->getValueType(0),
4523 Offset + BA->getOffset(), BA->getTargetFlags()));
4524 return;
4525 } else {
4526 const unsigned OpCode = Op.getOpcode();
4527 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
4528 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
4529 Op = Op.getOperand(1);
4530 // Subtraction is not commutative.
4531 else if (OpCode == ISD::ADD &&
4532 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
4533 Op = Op.getOperand(0);
4534 else
4535 return;
4536 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
4537 continue;
4538 }
4539 }
4540 return;
4541 }
4542 break;
4543 }
4544 }
4545}
4546
4547std::pair<unsigned, const TargetRegisterClass *>
4548TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
4549 StringRef Constraint,
4550 MVT VT) const {
4551 if (Constraint.empty() || Constraint[0] != '{')
4552 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
4553 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
4554
4555 // Remove the braces from around the name.
4556 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
4557
4558 std::pair<unsigned, const TargetRegisterClass *> R =
4559 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
4560
4561 // Figure out which register class contains this reg.
4562 for (const TargetRegisterClass *RC : RI->regclasses()) {
4563 // If none of the value types for this register class are valid, we
4564 // can't use it. For example, 64-bit reg classes on 32-bit targets.
4565 if (!isLegalRC(*RI, *RC))
4566 continue;
4567
4568 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
4569 I != E; ++I) {
4570 if (RegName.equals_lower(RI->getRegAsmName(*I))) {
4571 std::pair<unsigned, const TargetRegisterClass *> S =
4572 std::make_pair(*I, RC);
4573
4574 // If this register class has the requested value type, return it,
4575 // otherwise keep searching and return the first class found
4576 // if no other is found which explicitly has the requested type.
4577 if (RI->isTypeLegalForClass(*RC, VT))
4578 return S;
4579 if (!R.second)
4580 R = S;
4581 }
4582 }
4583 }
4584
4585 return R;
4586}
4587
4588//===----------------------------------------------------------------------===//
4589// Constraint Selection.
4590
4591/// Return true of this is an input operand that is a matching constraint like
4592/// "4".
4593bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
4594 assert(!ConstraintCode.empty() && "No known constraint!");
4595 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
4596}
4597
4598/// If this is an input matching constraint, this method returns the output
4599/// operand it matches.
4600unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
4601 assert(!ConstraintCode.empty() && "No known constraint!");
4602 return atoi(ConstraintCode.c_str());
4603}
4604
4605/// Split up the constraint string from the inline assembly value into the
4606/// specific constraints and their prefixes, and also tie in the associated
4607/// operand values.
4608/// If this returns an empty vector, and if the constraint string itself
4609/// isn't empty, there was an error parsing.
4610TargetLowering::AsmOperandInfoVector
4611TargetLowering::ParseConstraints(const DataLayout &DL,
4612 const TargetRegisterInfo *TRI,
4613 const CallBase &Call) const {
4614 /// Information about all of the constraints.
4615 AsmOperandInfoVector ConstraintOperands;
4616 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
4617 unsigned maCount = 0; // Largest number of multiple alternative constraints.
4618
4619 // Do a prepass over the constraints, canonicalizing them, and building up the
4620 // ConstraintOperands list.
4621 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
4622 unsigned ResNo = 0; // ResNo - The result number of the next output.
4623
4624 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
4625 ConstraintOperands.emplace_back(std::move(CI));
4626 AsmOperandInfo &OpInfo = ConstraintOperands.back();
4627
4628 // Update multiple alternative constraint count.
4629 if (OpInfo.multipleAlternatives.size() > maCount)
4630 maCount = OpInfo.multipleAlternatives.size();
4631
4632 OpInfo.ConstraintVT = MVT::Other;
4633
4634 // Compute the value type for each operand.
4635 switch (OpInfo.Type) {
4636 case InlineAsm::isOutput:
4637 // Indirect outputs just consume an argument.
4638 if (OpInfo.isIndirect) {
4639 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
4640 break;
4641 }
4642
4643 // The return value of the call is this value. As such, there is no
4644 // corresponding argument.
4645 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
4646 if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
4647 OpInfo.ConstraintVT =
4648 getSimpleValueType(DL, STy->getElementType(ResNo));
4649 } else {
4650 assert(ResNo == 0 && "Asm only has one result!");
4651 OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType());
4652 }
4653 ++ResNo;
4654 break;
4655 case InlineAsm::isInput:
4656 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
4657 break;
4658 case InlineAsm::isClobber:
4659 // Nothing to do.
4660 break;
4661 }
4662
4663 if (OpInfo.CallOperandVal) {
4664 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
4665 if (OpInfo.isIndirect) {
4666 llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
4667 if (!PtrTy)
4668 report_fatal_error("Indirect operand for inline asm not a pointer!");
4669 OpTy = PtrTy->getElementType();
4670 }
4671
4672 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
4673 if (StructType *STy = dyn_cast<StructType>(OpTy))
4674 if (STy->getNumElements() == 1)
4675 OpTy = STy->getElementType(0);
4676
4677 // If OpTy is not a single value, it may be a struct/union that we
4678 // can tile with integers.
4679 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4680 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
4681 switch (BitSize) {
4682 default: break;
4683 case 1:
4684 case 8:
4685 case 16:
4686 case 32:
4687 case 64:
4688 case 128:
4689 OpInfo.ConstraintVT =
4690 MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
4691 break;
4692 }
4693 } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
4694 unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
4695 OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
4696 } else {
4697 OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
4698 }
4699 }
4700 }
4701
4702 // If we have multiple alternative constraints, select the best alternative.
4703 if (!ConstraintOperands.empty()) {
4704 if (maCount) {
4705 unsigned bestMAIndex = 0;
4706 int bestWeight = -1;
4707 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
4708 int weight = -1;
4709 unsigned maIndex;
4710 // Compute the sums of the weights for each alternative, keeping track
4711 // of the best (highest weight) one so far.
4712 for (maIndex = 0; maIndex < maCount; ++maIndex) {
4713 int weightSum = 0;
4714 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4715 cIndex != eIndex; ++cIndex) {
4716 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4717 if (OpInfo.Type == InlineAsm::isClobber)
4718 continue;
4719
4720 // If this is an output operand with a matching input operand,
4721 // look up the matching input. If their types mismatch, e.g. one
4722 // is an integer, the other is floating point, or their sizes are
4723 // different, flag it as an maCantMatch.
4724 if (OpInfo.hasMatchingInput()) {
4725 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4726 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4727 if ((OpInfo.ConstraintVT.isInteger() !=
4728 Input.ConstraintVT.isInteger()) ||
4729 (OpInfo.ConstraintVT.getSizeInBits() !=
4730 Input.ConstraintVT.getSizeInBits())) {
4731 weightSum = -1; // Can't match.
4732 break;
4733 }
4734 }
4735 }
4736 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
4737 if (weight == -1) {
4738 weightSum = -1;
4739 break;
4740 }
4741 weightSum += weight;
4742 }
4743 // Update best.
4744 if (weightSum > bestWeight) {
4745 bestWeight = weightSum;
4746 bestMAIndex = maIndex;
4747 }
4748 }
4749
4750 // Now select chosen alternative in each constraint.
4751 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4752 cIndex != eIndex; ++cIndex) {
4753 AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
4754 if (cInfo.Type == InlineAsm::isClobber)
4755 continue;
4756 cInfo.selectAlternative(bestMAIndex);
4757 }
4758 }
4759 }
4760
4761 // Check and hook up tied operands, choose constraint code to use.
4762 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4763 cIndex != eIndex; ++cIndex) {
4764 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4765
4766 // If this is an output operand with a matching input operand, look up the
4767 // matching input. If their types mismatch, e.g. one is an integer, the
4768 // other is floating point, or their sizes are different, flag it as an
4769 // error.
4770 if (OpInfo.hasMatchingInput()) {
4771 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4772
4773 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4774 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
4775 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
4776 OpInfo.ConstraintVT);
4777 std::pair<unsigned, const TargetRegisterClass *> InputRC =
4778 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
4779 Input.ConstraintVT);
4780 if ((OpInfo.ConstraintVT.isInteger() !=
4781 Input.ConstraintVT.isInteger()) ||
4782 (MatchRC.second != InputRC.second)) {
4783 report_fatal_error("Unsupported asm: input constraint"
4784 " with a matching output constraint of"
4785 " incompatible type!");
4786 }
4787 }
4788 }
4789 }
4790
4791 return ConstraintOperands;
4792}
4793
4794/// Return an integer indicating how general CT is.
4795static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
4796 switch (CT) {
4797 case TargetLowering::C_Immediate:
4798 case TargetLowering::C_Other:
4799 case TargetLowering::C_Unknown:
4800 return 0;
4801 case TargetLowering::C_Register:
4802 return 1;
4803 case TargetLowering::C_RegisterClass:
4804 return 2;
4805 case TargetLowering::C_Memory:
4806 return 3;
4807 }
4808 llvm_unreachable("Invalid constraint type");
4809}
4810
4811/// Examine constraint type and operand type and determine a weight value.
4812/// This object must already have been set up with the operand type
4813/// and the current alternative constraint selected.
4814TargetLowering::ConstraintWeight
4815 TargetLowering::getMultipleConstraintMatchWeight(
4816 AsmOperandInfo &info, int maIndex) const {
4817 InlineAsm::ConstraintCodeVector *rCodes;
4818 if (maIndex >= (int)info.multipleAlternatives.size())
4819 rCodes = &info.Codes;
4820 else
4821 rCodes = &info.multipleAlternatives[maIndex].Codes;
4822 ConstraintWeight BestWeight = CW_Invalid;
4823
4824 // Loop over the options, keeping track of the most general one.
4825 for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
4826 ConstraintWeight weight =
4827 getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
4828 if (weight > BestWeight)
4829 BestWeight = weight;
4830 }
4831
4832 return BestWeight;
4833}
4834
4835/// Examine constraint type and operand type and determine a weight value.
4836/// This object must already have been set up with the operand type
4837/// and the current alternative constraint selected.
4838TargetLowering::ConstraintWeight
4839 TargetLowering::getSingleConstraintMatchWeight(
4840 AsmOperandInfo &info, const char *constraint) const {
4841 ConstraintWeight weight = CW_Invalid;
4842 Value *CallOperandVal = info.CallOperandVal;
4843 // If we don't have a value, we can't do a match,
4844 // but allow it at the lowest weight.
4845 if (!CallOperandVal)
4846 return CW_Default;
4847 // Look at the constraint type.
4848 switch (*constraint) {
4849 case 'i': // immediate integer.
4850 case 'n': // immediate integer with a known value.
4851 if (isa<ConstantInt>(CallOperandVal))
4852 weight = CW_Constant;
4853 break;
4854 case 's': // non-explicit intregal immediate.
4855 if (isa<GlobalValue>(CallOperandVal))
4856 weight = CW_Constant;
4857 break;
4858 case 'E': // immediate float if host format.
4859 case 'F': // immediate float.
4860 if (isa<ConstantFP>(CallOperandVal))
4861 weight = CW_Constant;
4862 break;
4863 case '<': // memory operand with autodecrement.
4864 case '>': // memory operand with autoincrement.
4865 case 'm': // memory operand.
4866 case 'o': // offsettable memory operand
4867 case 'V': // non-offsettable memory operand
4868 weight = CW_Memory;
4869 break;
4870 case 'r': // general register.
4871 case 'g': // general register, memory operand or immediate integer.
4872 // note: Clang converts "g" to "imr".
4873 if (CallOperandVal->getType()->isIntegerTy())
4874 weight = CW_Register;
4875 break;
4876 case 'X': // any operand.
4877 default:
4878 weight = CW_Default;
4879 break;
4880 }
4881 return weight;
4882}
4883
4884/// If there are multiple different constraints that we could pick for this
4885/// operand (e.g. "imr") try to pick the 'best' one.
4886/// This is somewhat tricky: constraints fall into four classes:
4887/// Other -> immediates and magic values
4888/// Register -> one specific register
4889/// RegisterClass -> a group of regs
4890/// Memory -> memory
4891/// Ideally, we would pick the most specific constraint possible: if we have
4892/// something that fits into a register, we would pick it. The problem here
4893/// is that if we have something that could either be in a register or in
4894/// memory that use of the register could cause selection of *other*
4895/// operands to fail: they might only succeed if we pick memory. Because of
4896/// this the heuristic we use is:
4897///
4898/// 1) If there is an 'other' constraint, and if the operand is valid for
4899/// that constraint, use it. This makes us take advantage of 'i'
4900/// constraints when available.
4901/// 2) Otherwise, pick the most general constraint present. This prefers
4902/// 'm' over 'r', for example.
4903///
4904static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
4905 const TargetLowering &TLI,
4906 SDValue Op, SelectionDAG *DAG) {
4907 assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
4908 unsigned BestIdx = 0;
4909 TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
4910 int BestGenerality = -1;
4911
4912 // Loop over the options, keeping track of the most general one.
4913 for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
4914 TargetLowering::ConstraintType CType =
4915 TLI.getConstraintType(OpInfo.Codes[i]);
4916
4917 // Indirect 'other' or 'immediate' constraints are not allowed.
4918 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
4919 CType == TargetLowering::C_Register ||
4920 CType == TargetLowering::C_RegisterClass))
4921 continue;
4922
4923 // If this is an 'other' or 'immediate' constraint, see if the operand is
4924 // valid for it. For example, on X86 we might have an 'rI' constraint. If
4925 // the operand is an integer in the range [0..31] we want to use I (saving a
4926 // load of a register), otherwise we must use 'r'.
4927 if ((CType == TargetLowering::C_Other ||
4928 CType == TargetLowering::C_Immediate) && Op.getNode()) {
4929 assert(OpInfo.Codes[i].size() == 1 &&
4930 "Unhandled multi-letter 'other' constraint");
4931 std::vector<SDValue> ResultOps;
4932 TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
4933 ResultOps, *DAG);
4934 if (!ResultOps.empty()) {
4935 BestType = CType;
4936 BestIdx = i;
4937 break;
4938 }
4939 }
4940
4941 // Things with matching constraints can only be registers, per gcc
4942 // documentation. This mainly affects "g" constraints.
4943 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
4944 continue;
4945
4946 // This constraint letter is more general than the previous one, use it.
4947 int Generality = getConstraintGenerality(CType);
4948 if (Generality > BestGenerality) {
4949 BestType = CType;
4950 BestIdx = i;
4951 BestGenerality = Generality;
4952 }
4953 }
4954
4955 OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
4956 OpInfo.ConstraintType = BestType;
4957}
4958
4959/// Determines the constraint code and constraint type to use for the specific
4960/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
4961void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4962 SDValue Op,
4963 SelectionDAG *DAG) const {
4964 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
4965
4966 // Single-letter constraints ('r') are very common.
4967 if (OpInfo.Codes.size() == 1) {
4968 OpInfo.ConstraintCode = OpInfo.Codes[0];
4969 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4970 } else {
4971 ChooseConstraint(OpInfo, *this, Op, DAG);
4972 }
4973
4974 // 'X' matches anything.
4975 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
4976 // Labels and constants are handled elsewhere ('X' is the only thing
4977 // that matches labels). For Functions, the type here is the type of
4978 // the result, which is not what we want to look at; leave them alone.
4979 Value *v = OpInfo.CallOperandVal;
4980 if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
4981 OpInfo.CallOperandVal = v;
4982 return;
4983 }
4984
4985 if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
4986 return;
4987
4988 // Otherwise, try to resolve it to something we know about by looking at
4989 // the actual operand type.
4990 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
4991 OpInfo.ConstraintCode = Repl;
4992 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4993 }
4994 }
4995}
4996
4997/// Given an exact SDIV by a constant, create a multiplication
4998/// with the multiplicative inverse of the constant.
4999static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
5000 const SDLoc &dl, SelectionDAG &DAG,
5001 SmallVectorImpl<SDNode *> &Created) {
5002 SDValue Op0 = N->getOperand(0);
5003 SDValue Op1 = N->getOperand(1);
5004 EVT VT = N->getValueType(0);
5005 EVT SVT = VT.getScalarType();
5006 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
5007 EVT ShSVT = ShVT.getScalarType();
5008
5009 bool UseSRA = false;
5010 SmallVector<SDValue, 16> Shifts, Factors;
5011
5012 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
5013 if (C->isNullValue())
5014 return false;
5015 APInt Divisor = C->getAPIntValue();
5016 unsigned Shift = Divisor.countTrailingZeros();
5017 if (Shift) {
5018 Divisor.ashrInPlace(Shift);
5019 UseSRA = true;
5020 }
5021 // Calculate the multiplicative inverse, using Newton's method.
5022 APInt t;
5023 APInt Factor = Divisor;
5024 while ((t = Divisor * Factor) != 1)
5025 Factor *= APInt(Divisor.getBitWidth(), 2) - t;
5026 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
5027 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
5028 return true;
5029 };
5030
5031 // Collect all magic values from the build vector.
5032 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
5033 return SDValue();
5034
5035 SDValue Shift, Factor;
5036 if (VT.isFixedLengthVector()) {
5037 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
5038 Factor = DAG.getBuildVector(VT, dl, Factors);
5039 } else if (VT.isScalableVector()) {
5040 assert(Shifts.size() == 1 && Factors.size() == 1 &&
5041 "Expected matchUnaryPredicate to return one element for scalable "
5042 "vectors");
5043 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
5044 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
5045 } else {
5046 Shift = Shifts[0];
5047 Factor = Factors[0];
5048 }
5049
5050 SDValue Res = Op0;
5051
5052 // Shift the value upfront if it is even, so the LSB is one.
5053 if (UseSRA) {
5054 // TODO: For UDIV use SRL instead of SRA.
5055 SDNodeFlags Flags;
5056 Flags.setExact(true);
5057 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
5058 Created.push_back(Res.getNode());
5059 }
5060
5061 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
5062}
5063
5064SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
5065 SelectionDAG &DAG,
5066 SmallVectorImpl<SDNode *> &Created) const {
5067 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5068 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5069 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
5070 return SDValue(N, 0); // Lower SDIV as SDIV
5071 return SDValue();
5072}
5073
5074/// Given an ISD::SDIV node expressing a divide by constant,
5075/// return a DAG expression to select that will generate the same value by
5076/// multiplying by a magic number.
5077/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5078SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
5079 bool IsAfterLegalization,
5080 SmallVectorImpl<SDNode *> &Created) const {
5081 SDLoc dl(N);
5082 EVT VT = N->getValueType(0);
5083 EVT SVT = VT.getScalarType();
5084 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5085 EVT ShSVT = ShVT.getScalarType();
5086 unsigned EltBits = VT.getScalarSizeInBits();
5087
5088 // Check to see if we can do this.
5089 // FIXME: We should be more aggressive here.
5090 if (!isTypeLegal(VT))
5091 return SDValue();
5092
5093 // If the sdiv has an 'exact' bit we can use a simpler lowering.
5094 if (N->getFlags().hasExact())
5095 return BuildExactSDIV(*this, N, dl, DAG, Created);
5096
5097 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5098
5099 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
5100 if (C->isNullValue())
5101 return false;
5102
5103 const APInt &Divisor = C->getAPIntValue();
5104 APInt::ms magics = Divisor.magic();
5105 int NumeratorFactor = 0;
5106 int ShiftMask = -1;
5107
5108 if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
5109 // If d is +1/-1, we just multiply the numerator by +1/-1.
5110 NumeratorFactor = Divisor.getSExtValue();
5111 magics.m = 0;
5112 magics.s = 0;
5113 ShiftMask = 0;
5114 } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
5115 // If d > 0 and m < 0, add the numerator.
5116 NumeratorFactor = 1;
5117 } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
5118 // If d < 0 and m > 0, subtract the numerator.
5119 NumeratorFactor = -1;
5120 }
5121
5122 MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
5123 Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
5124 Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
5125 ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
5126 return true;
5127 };
5128
5129 SDValue N0 = N->getOperand(0);
5130 SDValue N1 = N->getOperand(1);
5131
5132 // Collect the shifts / magic values from each element.
5133 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
5134 return SDValue();
5135
5136 SDValue MagicFactor, Factor, Shift, ShiftMask;
5137 if (VT.isFixedLengthVector()) {
5138 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
5139 Factor = DAG.getBuildVector(VT, dl, Factors);
5140 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
5141 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
5142 } else if (VT.isScalableVector()) {
5143 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
5144 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
5145 "Expected matchUnaryPredicate to return one element for scalable "
5146 "vectors");
5147 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
5148 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
5149 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
5150 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
5151 } else {
5152 MagicFactor = MagicFactors[0];
5153 Factor = Factors[0];
5154 Shift = Shifts[0];
5155 ShiftMask = ShiftMasks[0];
5156 }
5157
5158 // Multiply the numerator (operand 0) by the magic value.
5159 // FIXME: We should support doing a MUL in a wider type.
5160 SDValue Q;
5161 if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
5162 : isOperationLegalOrCustom(ISD::MULHS, VT))
5163 Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
5164 else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
5165 : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
5166 SDValue LoHi =
5167 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
5168 Q = SDValue(LoHi.getNode(), 1);
5169 } else
5170 return SDValue(); // No mulhs or equivalent.
5171 Created.push_back(Q.getNode());
5172
5173 // (Optionally) Add/subtract the numerator using Factor.
5174 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
5175 Created.push_back(Factor.getNode());
5176 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
5177 Created.push_back(Q.getNode());
5178
5179 // Shift right algebraic by shift value.
5180 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
5181 Created.push_back(Q.getNode());
5182
5183 // Extract the sign bit, mask it and add it to the quotient.
5184 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
5185 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
5186 Created.push_back(T.getNode());
5187 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
5188 Created.push_back(T.getNode());
5189 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
5190}
5191
5192/// Given an ISD::UDIV node expressing a divide by constant,
5193/// return a DAG expression to select that will generate the same value by
5194/// multiplying by a magic number.
5195/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5196SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
5197 bool IsAfterLegalization,
5198 SmallVectorImpl<SDNode *> &Created) const {
5199 SDLoc dl(N);
5200 EVT VT = N->getValueType(0);
5201 EVT SVT = VT.getScalarType();
5202 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5203 EVT ShSVT = ShVT.getScalarType();
5204 unsigned EltBits = VT.getScalarSizeInBits();
5205
5206 // Check to see if we can do this.
5207 // FIXME: We should be more aggressive here.
5208 if (!isTypeLegal(VT))
5209 return SDValue();
5210
5211 bool UseNPQ = false;
5212 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5213
5214 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
5215 if (C->isNullValue())
5216 return false;
5217 // FIXME: We should use a narrower constant when the upper
5218 // bits are known to be zero.
5219 APInt Divisor = C->getAPIntValue();
5220 APInt::mu magics = Divisor.magicu();
5221 unsigned PreShift = 0, PostShift = 0;
5222
5223 // If the divisor is even, we can avoid using the expensive fixup by
5224 // shifting the divided value upfront.
5225 if (magics.a != 0 && !Divisor[0]) {
5226 PreShift = Divisor.countTrailingZeros();
5227 // Get magic number for the shifted divisor.
5228 magics = Divisor.lshr(PreShift).magicu(PreShift);
5229 assert(magics.a == 0 && "Should use cheap fixup now");
5230 }
5231
5232 APInt Magic = magics.m;
5233
5234 unsigned SelNPQ;
5235 if (magics.a == 0 || Divisor.isOneValue()) {
5236 assert(magics.s < Divisor.getBitWidth() &&
5237 "We shouldn't generate an undefined shift!");
5238 PostShift = magics.s;
5239 SelNPQ = false;
5240 } else {
5241 PostShift = magics.s - 1;
5242 SelNPQ = true;
5243 }
5244
5245 PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
5246 MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
5247 NPQFactors.push_back(
5248 DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5249 : APInt::getNullValue(EltBits),
5250 dl, SVT));
5251 PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
5252 UseNPQ |= SelNPQ;
5253 return true;
5254 };
5255
5256 SDValue N0 = N->getOperand(0);
5257 SDValue N1 = N->getOperand(1);
5258
5259 // Collect the shifts/magic values from each element.
5260 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
5261 return SDValue();
5262
5263 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
5264 if (VT.isFixedLengthVector()) {
5265 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
5266 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
5267 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
5268 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
5269 } else if (VT.isScalableVector()) {
5270 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
5271 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
5272 "Expected matchUnaryPredicate to return one for scalable vectors");
5273 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
5274 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
5275 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
5276 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
5277 } else {
5278 PreShift = PreShifts[0];
5279 MagicFactor = MagicFactors[0];
5280 PostShift = PostShifts[0];
5281 }
5282
5283 SDValue Q = N0;
5284 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
5285 Created.push_back(Q.getNode());
5286
5287 // FIXME: We should support doing a MUL in a wider type.
5288 auto GetMULHU = [&](SDValue X, SDValue Y) {
5289 if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
5290 : isOperationLegalOrCustom(ISD::MULHU, VT))
5291 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
5292 if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
5293 : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
5294 SDValue LoHi =
5295 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
5296 return SDValue(LoHi.getNode(), 1);
5297 }
5298 return SDValue(); // No mulhu or equivalent
5299 };
5300
5301 // Multiply the numerator (operand 0) by the magic value.
5302 Q = GetMULHU(Q, MagicFactor);
5303 if (!Q)
5304 return SDValue();
5305
5306 Created.push_back(Q.getNode());
5307
5308 if (UseNPQ) {
5309 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
5310 Created.push_back(NPQ.getNode());
5311
5312 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5313 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
5314 if (VT.isVector())
5315 NPQ = GetMULHU(NPQ, NPQFactor);
5316 else
5317 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
5318
5319 Created.push_back(NPQ.getNode());
5320
5321 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
5322 Created.push_back(Q.getNode());
5323 }
5324
5325 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
5326 Created.push_back(Q.getNode());
5327
5328 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5329
5330 SDValue One = DAG.getConstant(1, dl, VT);
5331 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
5332 return DAG.getSelect(dl, VT, IsOne, N0, Q);
5333}
5334
5335/// If all values in Values that *don't* match the predicate are same 'splat'
5336/// value, then replace all values with that splat value.
5337/// Else, if AlternativeReplacement was provided, then replace all values that
5338/// do match predicate with AlternativeReplacement value.
5339static void
5340turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
5341 std::function<bool(SDValue)> Predicate,
5342 SDValue AlternativeReplacement = SDValue()) {
5343 SDValue Replacement;
5344 // Is there a value for which the Predicate does *NOT* match? What is it?
5345 auto SplatValue = llvm::find_if_not(Values, Predicate);
5346 if (SplatValue != Values.end()) {
5347 // Does Values consist only of SplatValue's and values matching Predicate?
5348 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
5349 return Value == *SplatValue || Predicate(Value);
5350 })) // Then we shall replace values matching predicate with SplatValue.
5351 Replacement = *SplatValue;
5352 }
5353 if (!Replacement) {
5354 // Oops, we did not find the "baseline" splat value.
5355 if (!AlternativeReplacement)
5356 return; // Nothing to do.
5357 // Let's replace with provided value then.
5358 Replacement = AlternativeReplacement;
5359 }
5360 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
5361}
5362
5363/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
5364/// where the divisor is constant and the comparison target is zero,
5365/// return a DAG expression that will generate the same comparison result
5366/// using only multiplications, additions and shifts/rotations.
5367/// Ref: "Hacker's Delight" 10-17.
5368SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
5369 SDValue CompTargetNode,
5370 ISD::CondCode Cond,
5371 DAGCombinerInfo &DCI,
5372 const SDLoc &DL) const {
5373 SmallVector<SDNode *, 5> Built;
5374 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5375 DCI, DL, Built)) {
5376 for (SDNode *N : Built)
5377 DCI.AddToWorklist(N);
5378 return Folded;
5379 }
5380
5381 return SDValue();
5382}
5383
5384SDValue
5385TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
5386 SDValue CompTargetNode, ISD::CondCode Cond,
5387 DAGCombinerInfo &DCI, const SDLoc &DL,
5388 SmallVectorImpl<SDNode *> &Created) const {
5389 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
5390 // - D must be constant, with D = D0 * 2^K where D0 is odd
5391 // - P is the multiplicative inverse of D0 modulo 2^W
5392 // - Q = floor(((2^W) - 1) / D)
5393 // where W is the width of the common type of N and D.
5394 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5395 "Only applicable for (in)equality comparisons.");
5396
5397 SelectionDAG &DAG = DCI.DAG;
5398
5399 EVT VT = REMNode.getValueType();
5400 EVT SVT = VT.getScalarType();
5401 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5402 EVT ShSVT = ShVT.getScalarType();
5403
5404 // If MUL is unavailable, we cannot proceed in any case.
5405 if (!isOperationLegalOrCustom(ISD::MUL, VT))
5406 return SDValue();
5407
5408 bool ComparingWithAllZeros = true;
5409 bool AllComparisonsWithNonZerosAreTautological = true;
5410 bool HadTautologicalLanes = false;
5411 bool AllLanesAreTautological = true;
5412 bool HadEvenDivisor = false;
5413 bool AllDivisorsArePowerOfTwo = true;
5414 bool HadTautologicalInvertedLanes = false;
5415 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
5416
5417 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
5418 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5419 if (CDiv->isNullValue())
5420 return false;
5421
5422 const APInt &D = CDiv->getAPIntValue();
5423 const APInt &Cmp = CCmp->getAPIntValue();
5424
5425 ComparingWithAllZeros &= Cmp.isNullValue();
5426
5427 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5428 // if C2 is not less than C1, the comparison is always false.
5429 // But we will only be able to produce the comparison that will give the
5430 // opposive tautological answer. So this lane would need to be fixed up.
5431 bool TautologicalInvertedLane = D.ule(Cmp);
5432 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
5433
5434 // If all lanes are tautological (either all divisors are ones, or divisor
5435 // is not greater than the constant we are comparing with),
5436 // we will prefer to avoid the fold.
5437 bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
5438 HadTautologicalLanes |= TautologicalLane;
5439 AllLanesAreTautological &= TautologicalLane;
5440
5441 // If we are comparing with non-zero, we need'll need to subtract said
5442 // comparison value from the LHS. But there is no point in doing that if
5443 // every lane where we are comparing with non-zero is tautological..
5444 if (!Cmp.isNullValue())
5445 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
5446
5447 // Decompose D into D0 * 2^K
5448 unsigned K = D.countTrailingZeros();
5449 assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5450 APInt D0 = D.lshr(K);
5451
5452 // D is even if it has trailing zeros.
5453 HadEvenDivisor |= (K != 0);
5454 // D is a power-of-two if D0 is one.
5455 // If all divisors are power-of-two, we will prefer to avoid the fold.
5456 AllDivisorsArePowerOfTwo &= D0.isOneValue();
5457
5458 // P = inv(D0, 2^W)
5459 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5460 unsigned W = D.getBitWidth();
5461 APInt P = D0.zext(W + 1)
5462 .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5463 .trunc(W);
5464 assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5465 assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5466
5467 // Q = floor((2^W - 1) u/ D)
5468 // R = ((2^W - 1) u% D)
5469 APInt Q, R;
5470 APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
5471
5472 // If we are comparing with zero, then that comparison constant is okay,
5473 // else it may need to be one less than that.
5474 if (Cmp.ugt(R))
5475 Q -= 1;
5476
5477 assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5478 "We are expecting that K is always less than all-ones for ShSVT");
5479
5480 // If the lane is tautological the result can be constant-folded.
5481 if (TautologicalLane) {
5482 // Set P and K amount to a bogus values so we can try to splat them.
5483 P = 0;
5484 K = -1;
5485 // And ensure that comparison constant is tautological,
5486 // it will always compare true/false.
5487 Q = -1;
5488 }
5489
5490 PAmts.push_back(DAG.getConstant(P, DL, SVT));
5491 KAmts.push_back(
5492 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5493 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5494 return true;
5495 };
5496
5497 SDValue N = REMNode.getOperand(0);
5498 SDValue D = REMNode.getOperand(1);
5499
5500 // Collect the values from each element.
5501 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
5502 return SDValue();
5503
5504 // If all lanes are tautological, the result can be constant-folded.
5505 if (AllLanesAreTautological)
5506 return SDValue();
5507
5508 // If this is a urem by a powers-of-two, avoid the fold since it can be
5509 // best implemented as a bit test.
5510 if (AllDivisorsArePowerOfTwo)
5511 return SDValue();
5512
5513 SDValue PVal, KVal, QVal;
5514 if (VT.isVector()) {
5515 if (HadTautologicalLanes) {
5516 // Try to turn PAmts into a splat, since we don't care about the values
5517 // that are currently '0'. If we can't, just keep '0'`s.
5518 turnVectorIntoSplatVector(PAmts, isNullConstant);
5519 // Try to turn KAmts into a splat, since we don't care about the values
5520 // that are currently '-1'. If we can't, change them to '0'`s.
5521 turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5522 DAG.getConstant(0, DL, ShSVT));
5523 }
5524
5525 PVal = DAG.getBuildVector(VT, DL, PAmts);
5526 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5527 QVal = DAG.getBuildVector(VT, DL, QAmts);
5528 } else {
5529 PVal = PAmts[0];
5530 KVal = KAmts[0];
5531 QVal = QAmts[0];
5532 }
5533
5534 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
5535 if (!isOperationLegalOrCustom(ISD::SUB, VT))
5536 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
5537 assert(CompTargetNode.getValueType() == N.getValueType() &&
5538 "Expecting that the types on LHS and RHS of comparisons match.");
5539 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
5540 }
5541
5542 // (mul N, P)
5543 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5544 Created.push_back(Op0.getNode());
5545
5546 // Rotate right only if any divisor was even. We avoid rotates for all-odd
5547 // divisors as a performance improvement, since rotating by 0 is a no-op.
5548 if (HadEvenDivisor) {
5549 // We need ROTR to do this.
5550 if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5551 return SDValue();
5552 SDNodeFlags Flags;
5553 Flags.setExact(true);
5554 // UREM: (rotr (mul N, P), K)
5555 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5556 Created.push_back(Op0.getNode());
5557 }
5558
5559 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
5560 SDValue NewCC =
5561 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5562 ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5563 if (!HadTautologicalInvertedLanes)
5564 return NewCC;
5565
5566 // If any lanes previously compared always-false, the NewCC will give
5567 // always-true result for them, so we need to fixup those lanes.
5568 // Or the other way around for inequality predicate.
5569 assert(VT.isVector() && "Can/should only get here for vectors.");
5570 Created.push_back(NewCC.getNode());
5571
5572 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5573 // if C2 is not less than C1, the comparison is always false.
5574 // But we have produced the comparison that will give the
5575 // opposive tautological answer. So these lanes would need to be fixed up.
5576 SDValue TautologicalInvertedChannels =
5577 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
5578 Created.push_back(TautologicalInvertedChannels.getNode());
5579
5580 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
5581 // If we have a vector select, let's replace the comparison results in the
5582 // affected lanes with the correct tautological result.
5583 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
5584 DL, SETCCVT, SETCCVT);
5585 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
5586 Replacement, NewCC);
5587 }
5588
5589 // Else, we can just invert the comparison result in the appropriate lanes.
5590 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
5591 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
5592 TautologicalInvertedChannels);
5593
5594 return SDValue(); // Don't know how to lower.
5595}
5596
5597/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
5598/// where the divisor is constant and the comparison target is zero,
5599/// return a DAG expression that will generate the same comparison result
5600/// using only multiplications, additions and shifts/rotations.
5601/// Ref: "Hacker's Delight" 10-17.
5602SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
5603 SDValue CompTargetNode,
5604 ISD::CondCode Cond,
5605 DAGCombinerInfo &DCI,
5606 const SDLoc &DL) const {
5607 SmallVector<SDNode *, 7> Built;
5608 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5609 DCI, DL, Built)) {
5610 assert(Built.size() <= 7 && "Max size prediction failed.");
5611 for (SDNode *N : Built)
5612 DCI.AddToWorklist(N);
5613 return Folded;
5614 }
5615
5616 return SDValue();
5617}
5618
5619SDValue
5620TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5621 SDValue CompTargetNode, ISD::CondCode Cond,
5622 DAGCombinerInfo &DCI, const SDLoc &DL,
5623 SmallVectorImpl<SDNode *> &Created) const {
5624 // Fold:
5625 // (seteq/ne (srem N, D), 0)
5626 // To:
5627 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
5628 //
5629 // - D must be constant, with D = D0 * 2^K where D0 is odd
5630 // - P is the multiplicative inverse of D0 modulo 2^W
5631 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
5632 // - Q = floor((2 * A) / (2^K))
5633 // where W is the width of the common type of N and D.
5634 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5635 "Only applicable for (in)equality comparisons.");
5636
5637 SelectionDAG &DAG = DCI.DAG;
5638
5639 EVT VT = REMNode.getValueType();
5640 EVT SVT = VT.getScalarType();
5641 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5642 EVT ShSVT = ShVT.getScalarType();
5643
5644 // If MUL is unavailable, we cannot proceed in any case.
5645 if (!isOperationLegalOrCustom(ISD::MUL, VT))
5646 return SDValue();
5647
5648 // TODO: Could support comparing with non-zero too.
5649 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
5650 if (!CompTarget || !CompTarget->isNullValue())
5651 return SDValue();
5652
5653 bool HadIntMinDivisor = false;
5654 bool HadOneDivisor = false;
5655 bool AllDivisorsAreOnes = true;
5656 bool HadEvenDivisor = false;
5657 bool NeedToApplyOffset = false;
5658 bool AllDivisorsArePowerOfTwo = true;
5659 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
5660
5661 auto BuildSREMPattern = [&](ConstantSDNode *C) {
5662 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5663 if (C->isNullValue())
5664 return false;
5665
5666 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
5667
5668 // WARNING: this fold is only valid for positive divisors!
5669 APInt D = C->getAPIntValue();
5670 if (D.isNegative())
5671 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
5672
5673 HadIntMinDivisor |= D.isMinSignedValue();
5674
5675 // If all divisors are ones, we will prefer to avoid the fold.
5676 HadOneDivisor |= D.isOneValue();
5677 AllDivisorsAreOnes &= D.isOneValue();
5678
5679 // Decompose D into D0 * 2^K
5680 unsigned K = D.countTrailingZeros();
5681 assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5682 APInt D0 = D.lshr(K);
5683
5684 if (!D.isMinSignedValue()) {
5685 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
5686 // we don't care about this lane in this fold, we'll special-handle it.
5687 HadEvenDivisor |= (K != 0);
5688 }
5689
5690 // D is a power-of-two if D0 is one. This includes INT_MIN.
5691 // If all divisors are power-of-two, we will prefer to avoid the fold.
5692 AllDivisorsArePowerOfTwo &= D0.isOneValue();
5693
5694 // P = inv(D0, 2^W)
5695 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5696 unsigned W = D.getBitWidth();
5697 APInt P = D0.zext(W + 1)
5698 .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5699 .trunc(W);
5700 assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5701 assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5702
5703 // A = floor((2^(W - 1) - 1) / D0) & -2^K
5704 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
5705 A.clearLowBits(K);
5706
5707 if (!D.isMinSignedValue()) {
5708 // If divisor INT_MIN, then we don't care about this lane in this fold,
5709 // we'll special-handle it.
5710 NeedToApplyOffset |= A != 0;
5711 }
5712
5713 // Q = floor((2 * A) / (2^K))
5714 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
5715
5716 assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
5717 "We are expecting that A is always less than all-ones for SVT");
5718 assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5719 "We are expecting that K is always less than all-ones for ShSVT");
5720
5721 // If the divisor is 1 the result can be constant-folded. Likewise, we
5722 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
5723 if (D.isOneValue()) {
5724 // Set P, A and K to a bogus values so we can try to splat them.
5725 P = 0;
5726 A = -1;
5727 K = -1;
5728
5729 // x ?% 1 == 0 <--> true <--> x u<= -1
5730 Q = -1;
5731 }
5732
5733 PAmts.push_back(DAG.getConstant(P, DL, SVT));
5734 AAmts.push_back(DAG.getConstant(A, DL, SVT));
5735 KAmts.push_back(
5736 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5737 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5738 return true;
5739 };
5740
5741 SDValue N = REMNode.getOperand(0);
5742 SDValue D = REMNode.getOperand(1);
5743
5744 // Collect the values from each element.
5745 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
5746 return SDValue();
5747
5748 // If this is a srem by a one, avoid the fold since it can be constant-folded.
5749 if (AllDivisorsAreOnes)
5750 return SDValue();
5751
5752 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
5753 // since it can be best implemented as a bit test.
5754 if (AllDivisorsArePowerOfTwo)
5755 return SDValue();
5756
5757 SDValue PVal, AVal, KVal, QVal;
5758 if (VT.isFixedLengthVector()) {
5759 if (HadOneDivisor) {
5760 // Try to turn PAmts into a splat, since we don't care about the values
5761 // that are currently '0'. If we can't, just keep '0'`s.
5762 turnVectorIntoSplatVector(PAmts, isNullConstant);
5763 // Try to turn AAmts into a splat, since we don't care about the
5764 // values that are currently '-1'. If we can't, change them to '0'`s.
5765 turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
5766 DAG.getConstant(0, DL, SVT));
5767 // Try to turn KAmts into a splat, since we don't care about the values
5768 // that are currently '-1'. If we can't, change them to '0'`s.
5769 turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5770 DAG.getConstant(0, DL, ShSVT));
5771 }
5772
5773 PVal = DAG.getBuildVector(VT, DL, PAmts);
5774 AVal = DAG.getBuildVector(VT, DL, AAmts);
5775 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5776 QVal = DAG.getBuildVector(VT, DL, QAmts);
5777 } else if (VT.isScalableVector()) {
5778 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
5779 QAmts.size() == 1 &&
5780 "Expected matchUnaryPredicate to return one element for scalable "
5781 "vectors");
5782 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
5783 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
5784 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
5785 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
5786 } else {
5787 PVal = PAmts[0];
5788 AVal = AAmts[0];
5789 KVal = KAmts[0];
5790 QVal = QAmts[0];
5791 }
5792
5793 // (mul N, P)
5794 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5795 Created.push_back(Op0.getNode());
5796
5797 if (NeedToApplyOffset) {
5798 // We need ADD to do this.
5799 if (!isOperationLegalOrCustom(ISD::ADD, VT))
5800 return SDValue();
5801
5802 // (add (mul N, P), A)
5803 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
5804 Created.push_back(Op0.getNode());
5805 }
5806
5807 // Rotate right only if any divisor was even. We avoid rotates for all-odd
5808 // divisors as a performance improvement, since rotating by 0 is a no-op.
5809 if (HadEvenDivisor) {
5810 // We need ROTR to do this.
5811 if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5812 return SDValue();
5813 SDNodeFlags Flags;
5814 Flags.setExact(true);
5815 // SREM: (rotr (add (mul N, P), A), K)
5816 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5817 Created.push_back(Op0.getNode());
5818 }
5819
5820 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
5821 SDValue Fold =
5822 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5823 ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5824
5825 // If we didn't have lanes with INT_MIN divisor, then we're done.
5826 if (!HadIntMinDivisor)
5827 return Fold;
5828
5829 // That fold is only valid for positive divisors. Which effectively means,
5830 // it is invalid for INT_MIN divisors. So if we have such a lane,
5831 // we must fix-up results for said lanes.
5832 assert(VT.isVector() && "Can/should only get here for vectors.");
5833
5834 if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
5835 !isOperationLegalOrCustom(ISD::AND, VT) ||
5836 !isOperationLegalOrCustom(Cond, VT) ||
5837 !isOperationLegalOrCustom(ISD::VSELECT, VT))
5838 return SDValue();
5839
5840 Created.push_back(Fold.getNode());
5841
5842 SDValue IntMin = DAG.getConstant(
5843 APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
5844 SDValue IntMax = DAG.getConstant(
5845 APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
5846 SDValue Zero =
5847 DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
5848
5849 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
5850 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
5851 Created.push_back(DivisorIsIntMin.getNode());
5852
5853 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
5854 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
5855 Created.push_back(Masked.getNode());
5856 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
5857 Created.push_back(MaskedIsZero.getNode());
5858
5859 // To produce final result we need to blend 2 vectors: 'SetCC' and
5860 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
5861 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
5862 // constant-folded, select can get lowered to a shuffle with constant mask.
5863 SDValue Blended =
5864 DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
5865
5866 return Blended;
5867}
5868
5869bool TargetLowering::
5870verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
5871 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
5872 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
5873 "be a constant integer");
5874 return true;
5875 }
5876
5877 return false;
5878}
5879
5880SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
5881 const DenormalMode &Mode) const {
5882 SDLoc DL(Op);
5883 EVT VT = Op.getValueType();
5884 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5885 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
5886 // Testing it with denormal inputs to avoid wrong estimate.
5887 if (Mode.Input == DenormalMode::IEEE) {
5888 // This is specifically a check for the handling of denormal inputs,
5889 // not the result.
5890
5891 // Test = fabs(X) < SmallestNormal
5892 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
5893 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
5894 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
5895 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
5896 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
5897 }
5898 // Test = X == 0.0
5899 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
5900}
5901
5902SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
5903 bool LegalOps, bool OptForSize,
5904 NegatibleCost &Cost,
5905 unsigned Depth) const {
5906 // fneg is removable even if it has multiple uses.
5907 if (Op.getOpcode() == ISD::FNEG) {
5908 Cost = NegatibleCost::Cheaper;
5909 return Op.getOperand(0);
5910 }
5911
5912 // Don't recurse exponentially.
5913 if (Depth > SelectionDAG::MaxRecursionDepth)
5914 return SDValue();
5915
5916 // Pre-increment recursion depth for use in recursive calls.
5917 ++Depth;
5918 const SDNodeFlags Flags = Op->getFlags();
5919 const TargetOptions &Options = DAG.getTarget().Options;
5920 EVT VT = Op.getValueType();
5921 unsigned Opcode = Op.getOpcode();
5922
5923 // Don't allow anything with multiple uses unless we know it is free.
5924 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
5925 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
5926 isFPExtFree(VT, Op.getOperand(0).getValueType());
5927 if (!IsFreeExtend)
5928 return SDValue();
5929 }
5930
5931 auto RemoveDeadNode = [&](SDValue N) {
5932 if (N && N.getNode()->use_empty())
5933 DAG.RemoveDeadNode(N.getNode());
5934 };
5935
5936 SDLoc DL(Op);
5937
5938 // Because getNegatedExpression can delete nodes we need a handle to keep
5939 // temporary nodes alive in case the recursion manages to create an identical
5940 // node.
5941 std::list<HandleSDNode> Handles;
5942
5943 switch (Opcode) {
5944 case ISD::ConstantFP: {
5945 // Don't invert constant FP values after legalization unless the target says
5946 // the negated constant is legal.
5947 bool IsOpLegal =
5948 isOperationLegal(ISD::ConstantFP, VT) ||
5949 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
5950 OptForSize);
5951
5952 if (LegalOps && !IsOpLegal)
5953 break;
5954
5955 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
5956 V.changeSign();
5957 SDValue CFP = DAG.getConstantFP(V, DL, VT);
5958
5959 // If we already have the use of the negated floating constant, it is free
5960 // to negate it even it has multiple uses.
5961 if (!Op.hasOneUse() && CFP.use_empty())
5962 break;
5963 Cost = NegatibleCost::Neutral;
5964 return CFP;
5965 }
5966 case ISD::BUILD_VECTOR: {
5967 // Only permit BUILD_VECTOR of constants.
5968 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
5969 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
5970 }))
5971 break;
5972
5973 bool IsOpLegal =
5974 (isOperationLegal(ISD::ConstantFP, VT) &&
5975 isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
5976 llvm::all_of(Op->op_values(), [&](SDValue N) {
5977 return N.isUndef() ||
5978 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
5979 OptForSize);
5980 });
5981
5982 if (LegalOps && !IsOpLegal)
5983 break;
5984
5985 SmallVector<SDValue, 4> Ops;
5986 for (SDValue C : Op->op_values()) {
5987 if (C.isUndef()) {
5988 Ops.push_back(C);
5989 continue;
5990 }
5991 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
5992 V.changeSign();
5993 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
5994 }
5995 Cost = NegatibleCost::Neutral;
5996 return DAG.getBuildVector(VT, DL, Ops);
5997 }
5998 case ISD::FADD: {
5999 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6000 break;
6001
6002 // After operation legalization, it might not be legal to create new FSUBs.
6003 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
6004 break;
6005 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6006
6007 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
6008 NegatibleCost CostX = NegatibleCost::Expensive;
6009 SDValue NegX =
6010 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6011 // Prevent this node from being deleted by the next call.
6012 if (NegX)
6013 Handles.emplace_back(NegX);
6014
6015 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
6016 NegatibleCost CostY = NegatibleCost::Expensive;
6017 SDValue NegY =
6018 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6019
6020 // We're done with the handles.
6021 Handles.clear();
6022
6023 // Negate the X if its cost is less or equal than Y.
6024 if (NegX && (CostX <= CostY)) {
6025 Cost = CostX;
6026 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
6027 if (NegY != N)
6028 RemoveDeadNode(NegY);
6029 return N;
6030 }
6031
6032 // Negate the Y if it is not expensive.
6033 if (NegY) {
6034 Cost = CostY;
6035 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
6036 if (NegX != N)
6037 RemoveDeadNode(NegX);
6038 return N;
6039 }
6040 break;
6041 }
6042 case ISD::FSUB: {
6043 // We can't turn -(A-B) into B-A when we honor signed zeros.
6044 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6045 break;
6046
6047 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6048 // fold (fneg (fsub 0, Y)) -> Y
6049 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
6050 if (C->isZero()) {
6051 Cost = NegatibleCost::Cheaper;
6052 return Y;
6053 }
6054
6055 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
6056 Cost = NegatibleCost::Neutral;
6057 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
6058 }
6059 case ISD::FMUL:
6060 case ISD::FDIV: {
6061 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6062
6063 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
6064 NegatibleCost CostX = NegatibleCost::Expensive;
6065 SDValue NegX =
6066 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6067 // Prevent this node from being deleted by the next call.
6068 if (NegX)
6069 Handles.emplace_back(NegX);
6070
6071 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
6072 NegatibleCost CostY = NegatibleCost::Expensive;
6073 SDValue NegY =
6074 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6075
6076 // We're done with the handles.
6077 Handles.clear();
6078
6079 // Negate the X if its cost is less or equal than Y.
6080 if (NegX && (CostX <= CostY)) {
6081 Cost = CostX;
6082 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
6083 if (NegY != N)
6084 RemoveDeadNode(NegY);
6085 return N;
6086 }
6087
6088 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
6089 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
6090 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
6091 break;
6092
6093 // Negate the Y if it is not expensive.
6094 if (NegY) {
6095 Cost = CostY;
6096 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
6097 if (NegX != N)
6098 RemoveDeadNode(NegX);
6099 return N;
6100 }
6101 break;
6102 }
6103 case ISD::FMA:
6104 case ISD::FMAD: {
6105 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6106 break;
6107
6108 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
6109 NegatibleCost CostZ = NegatibleCost::Expensive;
6110 SDValue NegZ =
6111 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
6112 // Give up if fail to negate the Z.
6113 if (!NegZ)
6114 break;
6115
6116 // Prevent this node from being deleted by the next two calls.
6117 Handles.emplace_back(NegZ);
6118
6119 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
6120 NegatibleCost CostX = NegatibleCost::Expensive;
6121 SDValue NegX =
6122 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6123 // Prevent this node from being deleted by the next call.
6124 if (NegX)
6125 Handles.emplace_back(NegX);
6126
6127 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
6128 NegatibleCost CostY = NegatibleCost::Expensive;
6129 SDValue NegY =
6130 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6131
6132 // We're done with the handles.
6133 Handles.clear();
6134
6135 // Negate the X if its cost is less or equal than Y.
6136 if (NegX && (CostX <= CostY)) {
6137 Cost = std::min(CostX, CostZ);
6138 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
6139 if (NegY != N)
6140 RemoveDeadNode(NegY);
6141 return N;
6142 }
6143
6144 // Negate the Y if it is not expensive.
6145 if (NegY) {
6146 Cost = std::min(CostY, CostZ);
6147 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
6148 if (NegX != N)
6149 RemoveDeadNode(NegX);
6150 return N;
6151 }
6152 break;
6153 }
6154
6155 case ISD::FP_EXTEND:
6156 case ISD::FSIN:
6157 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
6158 OptForSize, Cost, Depth))
6159 return DAG.getNode(Opcode, DL, VT, NegV);
6160 break;
6161 case ISD::FP_ROUND:
6162 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
6163 OptForSize, Cost, Depth))
6164 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
6165 break;
6166 }
6167
6168 return SDValue();
6169}
6170
6171//===----------------------------------------------------------------------===//
6172// Legalization Utilities
6173//===----------------------------------------------------------------------===//
6174
6175bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
6176 SDValue LHS, SDValue RHS,
6177 SmallVectorImpl<SDValue> &Result,
6178 EVT HiLoVT, SelectionDAG &DAG,
6179 MulExpansionKind Kind, SDValue LL,
6180 SDValue LH, SDValue RL, SDValue RH) const {
6181 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
6182 Opcode == ISD::SMUL_LOHI);
6183
6184 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
6185 isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
6186 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
6187 isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
6188 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
6189 isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
6190 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
6191 isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
6192
6193 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
6194 return false;
6195
6196 unsigned OuterBitSize = VT.getScalarSizeInBits();
6197 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
6198
6199 // LL, LH, RL, and RH must be either all NULL or all set to a value.
6200 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
6201 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
6202
6203 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
6204 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
6205 bool Signed) -> bool {
6206 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
6207 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
6208 Hi = SDValue(Lo.getNode(), 1);
6209 return true;
6210 }
6211 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
6212 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
6213 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
6214 return true;
6215 }
6216 return false;
6217 };
6218
6219 SDValue Lo, Hi;
6220
6221 if (!LL.getNode() && !RL.getNode() &&
6222 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
6223 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
6224 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
6225 }
6226
6227 if (!LL.getNode())
6228 return false;
6229
6230 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
6231 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
6232 DAG.MaskedValueIsZero(RHS, HighMask)) {
6233 // The inputs are both zero-extended.
6234 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
6235 Result.push_back(Lo);
6236 Result.push_back(Hi);
6237 if (Opcode != ISD::MUL) {
6238 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
6239 Result.push_back(Zero);
6240 Result.push_back(Zero);
6241 }
6242 return true;
6243 }
6244 }
6245
6246 if (!VT.isVector() && Opcode == ISD::MUL &&
6247 DAG.ComputeNumSignBits(LHS) > InnerBitSize &&
6248 DAG.ComputeNumSignBits(RHS) > InnerBitSize) {
6249 // The input values are both sign-extended.
6250 // TODO non-MUL case?
6251 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
6252 Result.push_back(Lo);
6253 Result.push_back(Hi);
6254 return true;
6255 }
6256 }
6257
6258 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
6259 EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
6260 if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
6261 // FIXME getShiftAmountTy does not always return a sensible result when VT
6262 // is an illegal type, and so the type may be too small to fit the shift
6263 // amount. Override it with i32. The shift will have to be legalized.
6264 ShiftAmountTy = MVT::i32;
6265 }
6266 SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
6267
6268 if (!LH.getNode() && !RH.getNode() &&
6269 isOperationLegalOrCustom(ISD::SRL, VT) &&
6270 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
6271 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
6272 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
6273 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
6274 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
6275 }
6276
6277 if (!LH.getNode())
6278 return false;
6279
6280 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
6281 return false;
6282
6283 Result.push_back(Lo);
6284
6285 if (Opcode == ISD::MUL) {
6286 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
6287 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
6288 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
6289 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
6290 Result.push_back(Hi);
6291 return true;
6292 }
6293
6294 // Compute the full width result.
6295 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
6296 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
6297 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6298 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
6299 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
6300 };
6301
6302 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6303 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
6304 return false;
6305
6306 // This is effectively the add part of a multiply-add of half-sized operands,
6307 // so it cannot overflow.
6308 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6309
6310 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
6311 return false;
6312
6313 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
6314 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6315
6316 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
6317 isOperationLegalOrCustom(ISD::ADDE, VT));
6318 if (UseGlue)
6319 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
6320 Merge(Lo, Hi));
6321 else
6322 Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
6323 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
6324
6325 SDValue Carry = Next.getValue(1);
6326 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6327 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6328
6329 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
6330 return false;
6331
6332 if (UseGlue)
6333 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
6334 Carry);
6335 else
6336 Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
6337 Zero, Carry);
6338
6339 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6340
6341 if (Opcode == ISD::SMUL_LOHI) {
6342 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6343 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
6344 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
6345
6346 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6347 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
6348 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
6349 }
6350
6351 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6352 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6353 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6354 return true;
6355}
6356
6357bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
6358 SelectionDAG &DAG, MulExpansionKind Kind,
6359 SDValue LL, SDValue LH, SDValue RL,
6360 SDValue RH) const {
6361 SmallVector<SDValue, 2> Result;
6362 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
6363 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
6364 DAG, Kind, LL, LH, RL, RH);
6365 if (Ok) {
6366 assert(Result.size() == 2);
6367 Lo = Result[0];
6368 Hi = Result[1];
6369 }
6370 return Ok;
6371}
6372
6373// Check that (every element of) Z is undef or not an exact multiple of BW.
6374static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
6375 return ISD::matchUnaryPredicate(
6376 Z,
6377 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
6378 true);
6379}
6380
6381bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
6382 SelectionDAG &DAG) const {
6383 EVT VT = Node->getValueType(0);
6384
6385 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
6386 !isOperationLegalOrCustom(ISD::SRL, VT) ||
6387 !isOperationLegalOrCustom(ISD::SUB, VT) ||
6388 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6389 return false;
6390
6391 SDValue X = Node->getOperand(0);
6392 SDValue Y = Node->getOperand(1);
6393 SDValue Z = Node->getOperand(2);
6394
6395 unsigned BW = VT.getScalarSizeInBits();
6396 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
6397 SDLoc DL(SDValue(Node, 0));
6398
6399 EVT ShVT = Z.getValueType();
6400
6401 // If a funnel shift in the other direction is more supported, use it.
6402 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
6403 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
6404 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
6405 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
6406 // fshl X, Y, Z -> fshr X, Y, -Z
6407 // fshr X, Y, Z -> fshl X, Y, -Z
6408 SDValue Zero = DAG.getConstant(0, DL, ShVT);
6409 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
6410 } else {
6411 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6412 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6413 SDValue One = DAG.getConstant(1, DL, ShVT);
6414 if (IsFSHL) {
6415 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
6416 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
6417 } else {
6418 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
6419 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
6420 }
6421 Z = DAG.getNOT(DL, Z, ShVT);
6422 }
6423 Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
6424 return true;
6425 }
6426
6427 SDValue ShX, ShY;
6428 SDValue ShAmt, InvShAmt;
6429 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
6430 // fshl: X << C | Y >> (BW - C)
6431 // fshr: X << (BW - C) | Y >> C
6432 // where C = Z % BW is not zero
6433 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
6434 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
6435 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
6436 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
6437 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
6438 } else {
6439 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6440 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6441 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
6442 if (isPowerOf2_32(BW)) {
6443 // Z % BW -> Z & (BW - 1)
6444 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
6445 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6446 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
6447 } else {
6448 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
6449 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
6450 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
6451 }
6452
6453 SDValue One = DAG.getConstant(1, DL, ShVT);
6454 if (IsFSHL) {
6455 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
6456 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
6457 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
6458 } else {
6459 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
6460 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
6461 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
6462 }
6463 }
6464 Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
6465 return true;
6466}
6467
6468// TODO: Merge with expandFunnelShift.
6469bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
6470 SDValue &Result, SelectionDAG &DAG) const {
6471 EVT VT = Node->getValueType(0);
6472 unsigned EltSizeInBits = VT.getScalarSizeInBits();
6473 bool IsLeft = Node->getOpcode() == ISD::ROTL;
6474 SDValue Op0 = Node->getOperand(0);
6475 SDValue Op1 = Node->getOperand(1);
6476 SDLoc DL(SDValue(Node, 0));
6477
6478 EVT ShVT = Op1.getValueType();
6479 SDValue Zero = DAG.getConstant(0, DL, ShVT);
6480
6481 // If a rotate in the other direction is supported, use it.
6482 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
6483 if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
6484 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
6485 Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
6486 return true;
6487 }
6488
6489 if (!AllowVectorOps && VT.isVector() &&
6490 (!isOperationLegalOrCustom(ISD::SHL, VT) ||
6491 !isOperationLegalOrCustom(ISD::SRL, VT) ||
6492 !isOperationLegalOrCustom(ISD::SUB, VT) ||
6493 !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
6494 !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6495 return false;
6496
6497 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
6498 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
6499 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
6500 SDValue ShVal;
6501 SDValue HsVal;
6502 if (isPowerOf2_32(EltSizeInBits)) {
6503 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6504 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6505 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
6506 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
6507 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
6508 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
6509 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
6510 } else {
6511 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6512 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6513 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
6514 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
6515 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
6516 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
6517 SDValue One = DAG.getConstant(1, DL, ShVT);
6518 HsVal =
6519 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
6520 }
6521 Result = DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
6522 return true;
6523}
6524
6525bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
6526 SelectionDAG &DAG) const {
6527 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6528 SDValue Src = Node->getOperand(OpNo);
6529 EVT SrcVT = Src.getValueType();
6530 EVT DstVT = Node->getValueType(0);
6531 SDLoc dl(SDValue(Node, 0));
6532
6533 // FIXME: Only f32 to i64 conversions are supported.
6534 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
6535 return false;
6536
6537 if (Node->isStrictFPOpcode())
6538 // When a NaN is converted to an integer a trap is allowed. We can't
6539 // use this expansion here because it would eliminate that trap. Other
6540 // traps are also allowed and cannot be eliminated. See
6541 // IEEE 754-2008 sec 5.8.
6542 return false;
6543
6544 // Expand f32 -> i64 conversion
6545 // This algorithm comes from compiler-rt's implementation of fixsfdi:
6546 // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
6547 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
6548 EVT IntVT = SrcVT.changeTypeToInteger();
6549 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
6550
6551 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
6552 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
6553 SDValue Bias = DAG.getConstant(127, dl, IntVT);
6554 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
6555 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
6556 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
6557
6558 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
6559
6560 SDValue ExponentBits = DAG.getNode(
6561 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
6562 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
6563 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
6564
6565 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
6566 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
6567 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
6568 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
6569
6570 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
6571 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
6572 DAG.getConstant(0x00800000, dl, IntVT));
6573
6574 R = DAG.getZExtOrTrunc(R, dl, DstVT);
6575
6576 R = DAG.getSelectCC(
6577 dl, Exponent, ExponentLoBit,
6578 DAG.getNode(ISD::SHL, dl, DstVT, R,
6579 DAG.getZExtOrTrunc(
6580 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
6581 dl, IntShVT)),
6582 DAG.getNode(ISD::SRL, dl, DstVT, R,
6583 DAG.getZExtOrTrunc(
6584 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
6585 dl, IntShVT)),
6586 ISD::SETGT);
6587
6588 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
6589 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
6590
6591 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
6592 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
6593 return true;
6594}
6595
6596bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
6597 SDValue &Chain,
6598 SelectionDAG &DAG) const {
6599 SDLoc dl(SDValue(Node, 0));
6600 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
6601 SDValue Src = Node->getOperand(OpNo);
6602
6603 EVT SrcVT = Src.getValueType();
6604 EVT DstVT = Node->getValueType(0);
6605 EVT SetCCVT =
6606 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6607 EVT DstSetCCVT =
6608 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
6609
6610 // Only expand vector types if we have the appropriate vector bit operations.
6611 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
6612 ISD::FP_TO_SINT;
6613 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
6614 !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
6615 return false;
6616
6617 // If the maximum float value is smaller then the signed integer range,
6618 // the destination signmask can't be represented by the float, so we can
6619 // just use FP_TO_SINT directly.
6620 const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
6621 APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
6622 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
6623 if (APFloat::opOverflow &
6624 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
6625 if (Node->isStrictFPOpcode()) {
6626 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6627 { Node->getOperand(0), Src });
6628 Chain = Result.getValue(1);
6629 } else
6630 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6631 return true;
6632 }
6633
6634 // Don't expand it if there isn't cheap fsub instruction.
6635 if (!isOperationLegalOrCustom(
6636 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
6637 return false;
6638
6639 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
6640 SDValue Sel;
6641
6642 if (Node->isStrictFPOpcode()) {
6643 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
6644 Node->getOperand(0), /*IsSignaling*/ true);
6645 Chain = Sel.getValue(1);
6646 } else {
6647 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
6648 }
6649
6650 bool Strict = Node->isStrictFPOpcode() ||
6651 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
6652
6653 if (Strict) {
6654 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
6655 // signmask then offset (the result of which should be fully representable).
6656 // Sel = Src < 0x8000000000000000
6657 // FltOfs = select Sel, 0, 0x8000000000000000
6658 // IntOfs = select Sel, 0, 0x8000000000000000
6659 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
6660
6661 // TODO: Should any fast-math-flags be set for the FSUB?
6662 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
6663 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
6664 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
6665 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
6666 DAG.getConstant(0, dl, DstVT),
6667 DAG.getConstant(SignMask, dl, DstVT));
6668 SDValue SInt;
6669 if (Node->isStrictFPOpcode()) {
6670 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
6671 { Chain, Src, FltOfs });
6672 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
6673 { Val.getValue(1), Val });
6674 Chain = SInt.getValue(1);
6675 } else {
6676 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
6677 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
6678 }
6679 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
6680 } else {
6681 // Expand based on maximum range of FP_TO_SINT:
6682 // True = fp_to_sint(Src)
6683 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
6684 // Result = select (Src < 0x8000000000000000), True, False
6685
6686 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
6687 // TODO: Should any fast-math-flags be set for the FSUB?
6688 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
6689 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
6690 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
6691 DAG.getConstant(SignMask, dl, DstVT));
6692 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
6693 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
6694 }
6695 return true;
6696}
6697
6698bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
6699 SDValue &Chain,
6700 SelectionDAG &DAG) const {
6701 // This transform is not correct for converting 0 when rounding mode is set
6702 // to round toward negative infinity which will produce -0.0. So disable under
6703 // strictfp.
6704 if (Node->isStrictFPOpcode())
6705 return false;
6706
6707 SDValue Src = Node->getOperand(0);
6708 EVT SrcVT = Src.getValueType();
6709 EVT DstVT = Node->getValueType(0);
6710
6711 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
6712 return false;
6713
6714 // Only expand vector types if we have the appropriate vector bit operations.
6715 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
6716 !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
6717 !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
6718 !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
6719 !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
6720 return false;
6721
6722 SDLoc dl(SDValue(Node, 0));
6723 EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
6724
6725 // Implementation of unsigned i64 to f64 following the algorithm in
6726 // __floatundidf in compiler_rt. This implementation performs rounding
6727 // correctly in all rounding modes with the exception of converting 0
6728 // when rounding toward negative infinity. In that case the fsub will produce
6729 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
6730 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
6731 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
6732 BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
6733 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
6734 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
6735 SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
6736
6737 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
6738 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
6739 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
6740 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
6741 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
6742 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
6743 SDValue HiSub =
6744 DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
6745 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
6746 return true;
6747}
6748
6749SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
6750 SelectionDAG &DAG) const {
6751 SDLoc dl(Node);
6752 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
6753 ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6754 EVT VT = Node->getValueType(0);
6755
6756 if (VT.isScalableVector())
6757 report_fatal_error(
6758 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
6759
6760 if (isOperationLegalOrCustom(NewOp, VT)) {
6761 SDValue Quiet0 = Node->getOperand(0);
6762 SDValue Quiet1 = Node->getOperand(1);
6763
6764 if (!Node->getFlags().hasNoNaNs()) {
6765 // Insert canonicalizes if it's possible we need to quiet to get correct
6766 // sNaN behavior.
6767 if (!DAG.isKnownNeverSNaN(Quiet0)) {
6768 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
6769 Node->getFlags());
6770 }
6771 if (!DAG.isKnownNeverSNaN(Quiet1)) {
6772 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
6773 Node->getFlags());
6774 }
6775 }
6776
6777 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
6778 }
6779
6780 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
6781 // instead if there are no NaNs.
6782 if (Node->getFlags().hasNoNaNs()) {
6783 unsigned IEEE2018Op =
6784 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
6785 if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
6786 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
6787 Node->getOperand(1), Node->getFlags());
6788 }
6789 }
6790
6791 // If none of the above worked, but there are no NaNs, then expand to
6792 // a compare/select sequence. This is required for correctness since
6793 // InstCombine might have canonicalized a fcmp+select sequence to a
6794 // FMINNUM/FMAXNUM node. If we were to fall through to the default
6795 // expansion to libcall, we might introduce a link-time dependency
6796 // on libm into a file that originally did not have one.
6797 if (Node->getFlags().hasNoNaNs()) {
6798 ISD::CondCode Pred =
6799 Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
6800 SDValue Op1 = Node->getOperand(0);
6801 SDValue Op2 = Node->getOperand(1);
6802 SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
6803 // Copy FMF flags, but always set the no-signed-zeros flag
6804 // as this is implied by the FMINNUM/FMAXNUM semantics.
6805 SDNodeFlags Flags = Node->getFlags();
6806 Flags.setNoSignedZeros(true);
6807 SelCC->setFlags(Flags);
6808 return SelCC;
6809 }
6810
6811 return SDValue();
6812}
6813
6814bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
6815 SelectionDAG &DAG) const {
6816 SDLoc dl(Node);
6817 EVT VT = Node->getValueType(0);
6818 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6819 SDValue Op = Node->getOperand(0);
6820 unsigned Len = VT.getScalarSizeInBits();
6821 assert(VT.isInteger() && "CTPOP not implemented for this type.");
6822
6823 // TODO: Add support for irregular type lengths.
6824 if (!(Len <= 128 && Len % 8 == 0))
6825 return false;
6826
6827 // Only expand vector types if we have the appropriate vector bit operations.
6828 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
6829 !isOperationLegalOrCustom(ISD::SUB, VT) ||
6830 !isOperationLegalOrCustom(ISD::SRL, VT) ||
6831 (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
6832 !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
6833 return false;
6834
6835 // This is the "best" algorithm from
6836 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
6837 SDValue Mask55 =
6838 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
6839 SDValue Mask33 =
6840 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
6841 SDValue Mask0F =
6842 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
6843 SDValue Mask01 =
6844 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
6845
6846 // v = v - ((v >> 1) & 0x55555555...)
6847 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
6848 DAG.getNode(ISD::AND, dl, VT,
6849 DAG.getNode(ISD::SRL, dl, VT, Op,
6850 DAG.getConstant(1, dl, ShVT)),
6851 Mask55));
6852 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
6853 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
6854 DAG.getNode(ISD::AND, dl, VT,
6855 DAG.getNode(ISD::SRL, dl, VT, Op,
6856 DAG.getConstant(2, dl, ShVT)),
6857 Mask33));
6858 // v = (v + (v >> 4)) & 0x0F0F0F0F...
6859 Op = DAG.getNode(ISD::AND, dl, VT,
6860 DAG.getNode(ISD::ADD, dl, VT, Op,
6861 DAG.getNode(ISD::SRL, dl, VT, Op,
6862 DAG.getConstant(4, dl, ShVT))),
6863 Mask0F);
6864 // v = (v * 0x01010101...) >> (Len - 8)
6865 if (Len > 8)
6866 Op =
6867 DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
6868 DAG.getConstant(Len - 8, dl, ShVT));
6869
6870 Result = Op;
6871 return true;
6872}
6873
6874bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
6875 SelectionDAG &DAG) const {
6876 SDLoc dl(Node);
6877 EVT VT = Node->getValueType(0);
6878 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6879 SDValue Op = Node->getOperand(0);
6880 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6881
6882 // If the non-ZERO_UNDEF version is supported we can use that instead.
6883 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
6884 isOperationLegalOrCustom(ISD::CTLZ, VT)) {
6885 Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
6886 return true;
6887 }
6888
6889 // If the ZERO_UNDEF version is supported use that and handle the zero case.
6890 if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
6891 EVT SetCCVT =
6892 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6893 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
6894 SDValue Zero = DAG.getConstant(0, dl, VT);
6895 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6896 Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6897 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
6898 return true;
6899 }
6900
6901 // Only expand vector types if we have the appropriate vector bit operations.
6902 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6903 !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
6904 !isOperationLegalOrCustom(ISD::SRL, VT) ||
6905 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6906 return false;
6907
6908 // for now, we do this:
6909 // x = x | (x >> 1);
6910 // x = x | (x >> 2);
6911 // ...
6912 // x = x | (x >>16);
6913 // x = x | (x >>32); // for 64-bit input
6914 // return popcount(~x);
6915 //
6916 // Ref: "Hacker's Delight" by Henry Warren
6917 for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
6918 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
6919 Op = DAG.getNode(ISD::OR, dl, VT, Op,
6920 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
6921 }
6922 Op = DAG.getNOT(dl, Op, VT);
6923 Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
6924 return true;
6925}
6926
6927bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
6928 SelectionDAG &DAG) const {
6929 SDLoc dl(Node);
6930 EVT VT = Node->getValueType(0);
6931 SDValue Op = Node->getOperand(0);
6932 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6933
6934 // If the non-ZERO_UNDEF version is supported we can use that instead.
6935 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
6936 isOperationLegalOrCustom(ISD::CTTZ, VT)) {
6937 Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
6938 return true;
6939 }
6940
6941 // If the ZERO_UNDEF version is supported use that and handle the zero case.
6942 if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
6943 EVT SetCCVT =
6944 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6945 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
6946 SDValue Zero = DAG.getConstant(0, dl, VT);
6947 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6948 Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6949 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
6950 return true;
6951 }
6952
6953 // Only expand vector types if we have the appropriate vector bit operations.
6954 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6955 (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
6956 !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
6957 !isOperationLegalOrCustom(ISD::SUB, VT) ||
6958 !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
6959 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6960 return false;
6961
6962 // for now, we use: { return popcount(~x & (x - 1)); }
6963 // unless the target has ctlz but not ctpop, in which case we use:
6964 // { return 32 - nlz(~x & (x-1)); }
6965 // Ref: "Hacker's Delight" by Henry Warren
6966 SDValue Tmp = DAG.getNode(
6967 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
6968 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
6969
6970 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
6971 if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
6972 Result =
6973 DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
6974 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
6975 return true;
6976 }
6977
6978 Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
6979 return true;
6980}
6981
6982bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
6983 SelectionDAG &DAG, bool IsNegative) const {
6984 SDLoc dl(N);
6985 EVT VT = N->getValueType(0);
6986 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6987 SDValue Op = N->getOperand(0);
6988
6989 // abs(x) -> smax(x,sub(0,x))
6990 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
6991 isOperationLegal(ISD::SMAX, VT)) {
6992 SDValue Zero = DAG.getConstant(0, dl, VT);
6993 Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
6994 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
6995 return true;
6996 }
6997
6998 // abs(x) -> umin(x,sub(0,x))
6999 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
7000 isOperationLegal(ISD::UMIN, VT)) {
7001 SDValue Zero = DAG.getConstant(0, dl, VT);
7002 Result = DAG.getNode(ISD::UMIN, dl, VT, Op,
7003 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
7004 return true;
7005 }
7006
7007 // 0 - abs(x) -> smin(x, sub(0,x))
7008 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
7009 isOperationLegal(ISD::SMIN, VT)) {
7010 SDValue Zero = DAG.getConstant(0, dl, VT);
7011 Result = DAG.getNode(ISD::SMIN, dl, VT, Op,
7012 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
7013 return true;
7014 }
7015
7016 // Only expand vector types if we have the appropriate vector operations.
7017 if (VT.isVector() &&
7018 (!isOperationLegalOrCustom(ISD::SRA, VT) ||
7019 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
7020 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
7021 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
7022 return false;
7023
7024 SDValue Shift =
7025 DAG.getNode(ISD::SRA, dl, VT, Op,
7026 DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
7027 if (!IsNegative) {
7028 SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
7029 Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
7030 } else {
7031 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
7032 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
7033 Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
7034 }
7035 return true;
7036}
7037
7038std::pair<SDValue, SDValue>
7039TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
7040 SelectionDAG &DAG) const {
7041 SDLoc SL(LD);
7042 SDValue Chain = LD->getChain();
7043 SDValue BasePTR = LD->getBasePtr();
7044 EVT SrcVT = LD->getMemoryVT();
7045 EVT DstVT = LD->getValueType(0);
7046 ISD::LoadExtType ExtType = LD->getExtensionType();
7047
7048 if (SrcVT.isScalableVector())
7049 report_fatal_error("Cannot scalarize scalable vector loads");
7050
7051 unsigned NumElem = SrcVT.getVectorNumElements();
7052
7053 EVT SrcEltVT = SrcVT.getScalarType();
7054 EVT DstEltVT = DstVT.getScalarType();
7055
7056 // A vector must always be stored in memory as-is, i.e. without any padding
7057 // between the elements, since various code depend on it, e.g. in the
7058 // handling of a bitcast of a vector type to int, which may be done with a
7059 // vector store followed by an integer load. A vector that does not have
7060 // elements that are byte-sized must therefore be stored as an integer
7061 // built out of the extracted vector elements.
7062 if (!SrcEltVT.isByteSized()) {
7063 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
7064 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
7065
7066 unsigned NumSrcBits = SrcVT.getSizeInBits();
7067 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
7068
7069 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
7070 SDValue SrcEltBitMask = DAG.getConstant(
7071 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
7072
7073 // Load the whole vector and avoid masking off the top bits as it makes
7074 // the codegen worse.
7075 SDValue Load =
7076 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
7077 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
7078 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7079
7080 SmallVector<SDValue, 8> Vals;
7081 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
7082 unsigned ShiftIntoIdx =
7083 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
7084 SDValue ShiftAmount =
7085 DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
7086 LoadVT, SL, /*LegalTypes=*/false);
7087 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
7088 SDValue Elt =
7089 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
7090 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
7091
7092 if (ExtType != ISD::NON_EXTLOAD) {
7093 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
7094 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
7095 }
7096
7097 Vals.push_back(Scalar);
7098 }
7099
7100 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
7101 return std::make_pair(Value, Load.getValue(1));
7102 }
7103
7104 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
7105 assert(SrcEltVT.isByteSized());
7106
7107 SmallVector<SDValue, 8> Vals;
7108 SmallVector<SDValue, 8> LoadChains;
7109
7110 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
7111 SDValue ScalarLoad =
7112 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
7113 LD->getPointerInfo().getWithOffset(Idx * Stride),
7114 SrcEltVT, LD->getOriginalAlign(),
7115 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7116
7117 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
7118
7119 Vals.push_back(ScalarLoad.getValue(0));
7120 LoadChains.push_back(ScalarLoad.getValue(1));
7121 }
7122
7123 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
7124 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
7125
7126 return std::make_pair(Value, NewChain);
7127}
7128
7129SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
7130 SelectionDAG &DAG) const {
7131 SDLoc SL(ST);
7132
7133 SDValue Chain = ST->getChain();
7134 SDValue BasePtr = ST->getBasePtr();
7135 SDValue Value = ST->getValue();
7136 EVT StVT = ST->getMemoryVT();
7137
7138 if (StVT.isScalableVector())
7139 report_fatal_error("Cannot scalarize scalable vector stores");
7140
7141 // The type of the data we want to save
7142 EVT RegVT = Value.getValueType();
7143 EVT RegSclVT = RegVT.getScalarType();
7144
7145 // The type of data as saved in memory.
7146 EVT MemSclVT = StVT.getScalarType();
7147
7148 unsigned NumElem = StVT.getVectorNumElements();
7149
7150 // A vector must always be stored in memory as-is, i.e. without any padding
7151 // between the elements, since various code depend on it, e.g. in the
7152 // handling of a bitcast of a vector type to int, which may be done with a
7153 // vector store followed by an integer load. A vector that does not have
7154 // elements that are byte-sized must therefore be stored as an integer
7155 // built out of the extracted vector elements.
7156 if (!MemSclVT.isByteSized()) {
7157 unsigned NumBits = StVT.getSizeInBits();
7158 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
7159
7160 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
7161
7162 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
7163 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
7164 DAG.getVectorIdxConstant(Idx, SL));
7165 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
7166 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
7167 unsigned ShiftIntoIdx =
7168 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
7169 SDValue ShiftAmount =
7170 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
7171 SDValue ShiftedElt =
7172 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
7173 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
7174 }
7175
7176 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
7177 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
7178 ST->getAAInfo());
7179 }
7180
7181 // Store Stride in bytes
7182 unsigned Stride = MemSclVT.getSizeInBits() / 8;
7183 assert(Stride && "Zero stride!");
7184 // Extract each of the elements from the original vector and save them into
7185 // memory individually.
7186 SmallVector<SDValue, 8> Stores;
7187 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
7188 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
7189 DAG.getVectorIdxConstant(Idx, SL));
7190
7191 SDValue Ptr =
7192 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
7193
7194 // This scalar TruncStore may be illegal, but we legalize it later.
7195 SDValue Store = DAG.getTruncStore(
7196 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
7197 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
7198 ST->getAAInfo());
7199
7200 Stores.push_back(Store);
7201 }
7202
7203 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
7204}
7205
7206std::pair<SDValue, SDValue>
7207TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
7208 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
7209 "unaligned indexed loads not implemented!");
7210 SDValue Chain = LD->getChain();
7211 SDValue Ptr = LD->getBasePtr();
7212 EVT VT = LD->getValueType(0);
7213 EVT LoadedVT = LD->getMemoryVT();
7214 SDLoc dl(LD);
7215 auto &MF = DAG.getMachineFunction();
7216
7217 if (VT.isFloatingPoint() || VT.isVector()) {
7218 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
7219 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
7220 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
7221 LoadedVT.isVector()) {
7222 // Scalarize the load and let the individual components be handled.
7223 return scalarizeVectorLoad(LD, DAG);
7224 }
7225
7226 // Expand to a (misaligned) integer load of the same size,
7227 // then bitconvert to floating point or vector.
7228 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
7229 LD->getMemOperand());
7230 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
7231 if (LoadedVT != VT)
7232 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
7233 ISD::ANY_EXTEND, dl, VT, Result);
7234
7235 return std::make_pair(Result, newLoad.getValue(1));
7236 }
7237
7238 // Copy the value to a (aligned) stack slot using (unaligned) integer
7239 // loads and stores, then do a (aligned) load from the stack slot.
7240 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
7241 unsigned LoadedBytes = LoadedVT.getStoreSize();
7242 unsigned RegBytes = RegVT.getSizeInBits() / 8;
7243 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
7244
7245 // Make sure the stack slot is also aligned for the register type.
7246 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
7247 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
7248 SmallVector<SDValue, 8> Stores;
7249 SDValue StackPtr = StackBase;
7250 unsigned Offset = 0;
7251
7252 EVT PtrVT = Ptr.getValueType();
7253 EVT StackPtrVT = StackPtr.getValueType();
7254
7255 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
7256 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
7257
7258 // Do all but one copies using the full register width.
7259 for (unsigned i = 1; i < NumRegs; i++) {
7260 // Load one integer register's worth from the original location.
7261 SDValue Load = DAG.getLoad(
7262 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
7263 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
7264 LD->getAAInfo());
7265 // Follow the load with a store to the stack slot. Remember the store.
7266 Stores.push_back(DAG.getStore(
7267 Load.getValue(1), dl, Load, StackPtr,
7268 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
7269 // Increment the pointers.
7270 Offset += RegBytes;
7271
7272 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
7273 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
7274 }
7275
7276 // The last copy may be partial. Do an extending load.
7277 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
7278 8 * (LoadedBytes - Offset));
7279 SDValue Load =
7280 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
7281 LD->getPointerInfo().getWithOffset(Offset), MemVT,
7282 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
7283 LD->getAAInfo());
7284 // Follow the load with a store to the stack slot. Remember the store.
7285 // On big-endian machines this requires a truncating store to ensure
7286 // that the bits end up in the right place.
7287 Stores.push_back(DAG.getTruncStore(
7288 Load.getValue(1), dl, Load, StackPtr,
7289 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
7290
7291 // The order of the stores doesn't matter - say it with a TokenFactor.
7292 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7293
7294 // Finally, perform the original load only redirected to the stack slot.
7295 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
7296 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
7297 LoadedVT);
7298
7299 // Callers expect a MERGE_VALUES node.
7300 return std::make_pair(Load, TF);
7301 }
7302
7303 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
7304 "Unaligned load of unsupported type.");
7305
7306 // Compute the new VT that is half the size of the old one. This is an
7307 // integer MVT.
7308 unsigned NumBits = LoadedVT.getSizeInBits();
7309 EVT NewLoadedVT;
7310 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
7311 NumBits >>= 1;
7312
7313 Align Alignment = LD->getOriginalAlign();
7314 unsigned IncrementSize = NumBits / 8;
7315 ISD::LoadExtType HiExtType = LD->getExtensionType();
7316
7317 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
7318 if (HiExtType == ISD::NON_EXTLOAD)
7319 HiExtType = ISD::ZEXTLOAD;
7320
7321 // Load the value in two parts
7322 SDValue Lo, Hi;
7323 if (DAG.getDataLayout().isLittleEndian()) {
7324 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
7325 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
7326 LD->getAAInfo());
7327
7328 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
7329 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
7330 LD->getPointerInfo().getWithOffset(IncrementSize),
7331 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
7332 LD->getAAInfo());
7333 } else {
7334 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
7335 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
7336 LD->getAAInfo());
7337
7338 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
7339 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
7340 LD->getPointerInfo().getWithOffset(IncrementSize),
7341 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
7342 LD->getAAInfo());
7343 }
7344
7345 // aggregate the two parts
7346 SDValue ShiftAmount =
7347 DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
7348 DAG.getDataLayout()));
7349 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
7350 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
7351
7352 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
7353 Hi.getValue(1));
7354
7355 return std::make_pair(Result, TF);
7356}
7357
7358SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
7359 SelectionDAG &DAG) const {
7360 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
7361 "unaligned indexed stores not implemented!");
7362 SDValue Chain = ST->getChain();
7363 SDValue Ptr = ST->getBasePtr();
7364 SDValue Val = ST->getValue();
7365 EVT VT = Val.getValueType();
7366 Align Alignment = ST->getOriginalAlign();
7367 auto &MF = DAG.getMachineFunction();
7368 EVT StoreMemVT = ST->getMemoryVT();
7369
7370 SDLoc dl(ST);
7371 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
7372 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
7373 if (isTypeLegal(intVT)) {
7374 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
7375 StoreMemVT.isVector()) {
7376 // Scalarize the store and let the individual components be handled.
7377 SDValue Result = scalarizeVectorStore(ST, DAG);
7378 return Result;
7379 }
7380 // Expand to a bitconvert of the value to the integer type of the
7381 // same size, then a (misaligned) int store.
7382 // FIXME: Does not handle truncating floating point stores!
7383 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
7384 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
7385 Alignment, ST->getMemOperand()->getFlags());
7386 return Result;
7387 }
7388 // Do a (aligned) store to a stack slot, then copy from the stack slot
7389 // to the final destination using (unaligned) integer loads and stores.
7390 MVT RegVT = getRegisterType(
7391 *DAG.getContext(),
7392 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
7393 EVT PtrVT = Ptr.getValueType();
7394 unsigned StoredBytes = StoreMemVT.getStoreSize();
7395 unsigned RegBytes = RegVT.getSizeInBits() / 8;
7396 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
7397
7398 // Make sure the stack slot is also aligned for the register type.
7399 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
7400 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7401
7402 // Perform the original store, only redirected to the stack slot.
7403 SDValue Store = DAG.getTruncStore(
7404 Chain, dl, Val, StackPtr,
7405 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
7406
7407 EVT StackPtrVT = StackPtr.getValueType();
7408
7409 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
7410 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
7411 SmallVector<SDValue, 8> Stores;
7412 unsigned Offset = 0;
7413
7414 // Do all but one copies using the full register width.
7415 for (unsigned i = 1; i < NumRegs; i++) {
7416 // Load one integer register's worth from the stack slot.
7417 SDValue Load = DAG.getLoad(
7418 RegVT, dl, Store, StackPtr,
7419 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
7420 // Store it to the final location. Remember the store.
7421 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
7422 ST->getPointerInfo().getWithOffset(Offset),
7423 ST->getOriginalAlign(),
7424 ST->getMemOperand()->getFlags()));
7425 // Increment the pointers.
7426 Offset += RegBytes;
7427 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
7428 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
7429 }
7430
7431 // The last store may be partial. Do a truncating store. On big-endian
7432 // machines this requires an extending load from the stack slot to ensure
7433 // that the bits are in the right place.
7434 EVT LoadMemVT =
7435 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
7436
7437 // Load from the stack slot.
7438 SDValue Load = DAG.getExtLoad(
7439 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
7440 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
7441
7442 Stores.push_back(
7443 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
7444 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
7445 ST->getOriginalAlign(),
7446 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
7447 // The order of the stores doesn't matter - say it with a TokenFactor.
7448 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7449 return Result;
7450 }
7451
7452 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
7453 "Unaligned store of unknown type.");
7454 // Get the half-size VT
7455 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
7456 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
7457 unsigned IncrementSize = NumBits / 8;
7458
7459 // Divide the stored value in two parts.
7460 SDValue ShiftAmount = DAG.getConstant(
7461 NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
7462 SDValue Lo = Val;
7463 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
7464
7465 // Store the two parts
7466 SDValue Store1, Store2;
7467 Store1 = DAG.getTruncStore(Chain, dl,
7468 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
7469 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
7470 ST->getMemOperand()->getFlags());
7471
7472 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
7473 Store2 = DAG.getTruncStore(
7474 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
7475 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
7476 ST->getMemOperand()->getFlags(), ST->getAAInfo());
7477
7478 SDValue Result =
7479 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
7480 return Result;
7481}
7482
7483SDValue
7484TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
7485 const SDLoc &DL, EVT DataVT,
7486 SelectionDAG &DAG,
7487 bool IsCompressedMemory) const {
7488 SDValue Increment;
7489 EVT AddrVT = Addr.getValueType();
7490 EVT MaskVT = Mask.getValueType();
7491 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
7492 "Incompatible types of Data and Mask");
7493 if (IsCompressedMemory) {
7494 if (DataVT.isScalableVector())
7495 report_fatal_error(
7496 "Cannot currently handle compressed memory with scalable vectors");
7497 // Incrementing the pointer according to number of '1's in the mask.
7498 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
7499 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
7500 if (MaskIntVT.getSizeInBits() < 32) {
7501 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
7502 MaskIntVT = MVT::i32;
7503 }
7504
7505 // Count '1's with POPCNT.
7506 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
7507 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
7508 // Scale is an element size in bytes.
7509 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
7510 AddrVT);
7511 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
7512 } else if (DataVT.isScalableVector()) {
7513 Increment = DAG.getVScale(DL, AddrVT,
7514 APInt(AddrVT.getFixedSizeInBits(),
7515 DataVT.getStoreSize().getKnownMinSize()));
7516 } else
7517 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
7518
7519 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
7520}
7521
7522static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
7523 SDValue Idx,
7524 EVT VecVT,
7525 const SDLoc &dl) {
7526 if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
7527 return Idx;
7528
7529 EVT IdxVT = Idx.getValueType();
7530 unsigned NElts = VecVT.getVectorMinNumElements();
7531 if (VecVT.isScalableVector()) {
7532 SDValue VS = DAG.getVScale(dl, IdxVT,
7533 APInt(IdxVT.getFixedSizeInBits(),
7534 NElts));
7535 SDValue Sub = DAG.getNode(ISD::SUB, dl, IdxVT, VS,
7536 DAG.getConstant(1, dl, IdxVT));
7537
7538 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
7539 } else {
7540 if (isPowerOf2_32(NElts)) {
7541 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
7542 Log2_32(NElts));
7543 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
7544 DAG.getConstant(Imm, dl, IdxVT));
7545 }
7546 }
7547
7548 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
7549 DAG.getConstant(NElts - 1, dl, IdxVT));
7550}
7551
7552SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
7553 SDValue VecPtr, EVT VecVT,
7554 SDValue Index) const {
7555 SDLoc dl(Index);
7556 // Make sure the index type is big enough to compute in.
7557 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
7558
7559 EVT EltVT = VecVT.getVectorElementType();
7560
7561 // Calculate the element offset and add it to the pointer.
7562 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
7563 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
7564 "Converting bits to bytes lost precision");
7565
7566 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
7567
7568 EVT IdxVT = Index.getValueType();
7569
7570 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
7571 DAG.getConstant(EltSize, dl, IdxVT));
7572 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
7573}
7574
7575//===----------------------------------------------------------------------===//
7576// Implementation of Emulated TLS Model
7577//===----------------------------------------------------------------------===//
7578
7579SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
7580 SelectionDAG &DAG) const {
7581 // Access to address of TLS varialbe xyz is lowered to a function call:
7582 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
7583 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7584 PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
7585 SDLoc dl(GA);
7586
7587 ArgListTy Args;
7588 ArgListEntry Entry;
7589 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
7590 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
7591 StringRef EmuTlsVarName(NameString);
7592 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
7593 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
7594 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
7595 Entry.Ty = VoidPtrType;
7596 Args.push_back(Entry);
7597
7598 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
7599
7600 TargetLowering::CallLoweringInfo CLI(DAG);
7601 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
7602 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
7603 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7604
7605 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
7606 // At last for X86 targets, maybe good for other targets too?
7607 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7608 MFI.setAdjustsStack(true); // Is this only for X86 target?
7609 MFI.setHasCalls(true);
7610
7611 assert((GA->getOffset() == 0) &&
7612 "Emulated TLS must have zero offset in GlobalAddressSDNode");
7613 return CallResult.first;
7614}
7615
7616SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
7617 SelectionDAG &DAG) const {
7618 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
7619 if (!isCtlzFast())
7620 return SDValue();
7621 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7622 SDLoc dl(Op);
7623 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7624 if (C->isNullValue() && CC == ISD::SETEQ) {
7625 EVT VT = Op.getOperand(0).getValueType();
7626 SDValue Zext = Op.getOperand(0);
7627 if (VT.bitsLT(MVT::i32)) {
7628 VT = MVT::i32;
7629 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
7630 }
7631 unsigned Log2b = Log2_32(VT.getSizeInBits());
7632 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
7633 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
7634 DAG.getConstant(Log2b, dl, MVT::i32));
7635 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
7636 }
7637 }
7638 return SDValue();
7639}
7640
7641// Convert redundant addressing modes (e.g. scaling is redundant
7642// when accessing bytes).
7643ISD::MemIndexType
7644TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
7645 SDValue Offsets) const {
7646 bool IsScaledIndex =
7647 (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED);
7648 bool IsSignedIndex =
7649 (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
7650
7651 // Scaling is unimportant for bytes, canonicalize to unscaled.
7652 if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
7653 IsScaledIndex = false;
7654 IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
7655 }
7656
7657 return IndexType;
7658}
7659
7660SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
7661 SDValue Op0 = Node->getOperand(0);
7662 SDValue Op1 = Node->getOperand(1);
7663 EVT VT = Op0.getValueType();
7664 unsigned Opcode = Node->getOpcode();
7665 SDLoc DL(Node);
7666
7667 // umin(x,y) -> sub(x,usubsat(x,y))
7668 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
7669 isOperationLegal(ISD::USUBSAT, VT)) {
7670 return DAG.getNode(ISD::SUB, DL, VT, Op0,
7671 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
7672 }
7673
7674 // umax(x,y) -> add(x,usubsat(y,x))
7675 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
7676 isOperationLegal(ISD::USUBSAT, VT)) {
7677 return DAG.getNode(ISD::ADD, DL, VT, Op0,
7678 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
7679 }
7680
7681 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
7682 ISD::CondCode CC;
7683 switch (Opcode) {
7684 default: llvm_unreachable("How did we get here?");
7685 case ISD::SMAX: CC = ISD::SETGT; break;
7686 case ISD::SMIN: CC = ISD::SETLT; break;
7687 case ISD::UMAX: CC = ISD::SETUGT; break;
7688 case ISD::UMIN: CC = ISD::SETULT; break;
7689 }
7690
7691 // FIXME: Should really try to split the vector in case it's legal on a
7692 // subvector.
7693 if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
7694 return DAG.UnrollVectorOp(Node);
7695
7696 SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
7697 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
7698}
7699
7700SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
7701 unsigned Opcode = Node->getOpcode();
7702 SDValue LHS = Node->getOperand(0);
7703 SDValue RHS = Node->getOperand(1);
7704 EVT VT = LHS.getValueType();
7705 SDLoc dl(Node);
7706
7707 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
7708 assert(VT.isInteger() && "Expected operands to be integers");
7709
7710 // usub.sat(a, b) -> umax(a, b) - b
7711 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
7712 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
7713 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
7714 }
7715
7716 // uadd.sat(a, b) -> umin(a, ~b) + b
7717 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
7718 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
7719 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
7720 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
7721 }
7722
7723 unsigned OverflowOp;
7724 switch (Opcode) {
7725 case ISD::SADDSAT:
7726 OverflowOp = ISD::SADDO;
7727 break;
7728 case ISD::UADDSAT:
7729 OverflowOp = ISD::UADDO;
7730 break;
7731 case ISD::SSUBSAT:
7732 OverflowOp = ISD::SSUBO;
7733 break;
7734 case ISD::USUBSAT:
7735 OverflowOp = ISD::USUBO;
7736 break;
7737 default:
7738 llvm_unreachable("Expected method to receive signed or unsigned saturation "
7739 "addition or subtraction node.");
7740 }
7741
7742 // FIXME: Should really try to split the vector in case it's legal on a
7743 // subvector.
7744 if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
7745 return DAG.UnrollVectorOp(Node);
7746
7747 unsigned BitWidth = LHS.getScalarValueSizeInBits();
7748 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7749 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
7750 LHS, RHS);
7751 SDValue SumDiff = Result.getValue(0);
7752 SDValue Overflow = Result.getValue(1);
7753 SDValue Zero = DAG.getConstant(0, dl, VT);
7754 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
7755
7756 if (Opcode == ISD::UADDSAT) {
7757 if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7758 // (LHS + RHS) | OverflowMask
7759 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7760 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
7761 }
7762 // Overflow ? 0xffff.... : (LHS + RHS)
7763 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
7764 } else if (Opcode == ISD::USUBSAT) {
7765 if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
7766 // (LHS - RHS) & ~OverflowMask
7767 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
7768 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
7769 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
7770 }
7771 // Overflow ? 0 : (LHS - RHS)
7772 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
7773 } else {
7774 // SatMax -> Overflow && SumDiff < 0
7775 // SatMin -> Overflow && SumDiff >= 0
7776 APInt MinVal = APInt::getSignedMinValue(BitWidth);
7777 APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
7778 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7779 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7780 SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
7781 Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
7782 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
7783 }
7784}
7785
7786SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
7787 unsigned Opcode = Node->getOpcode();
7788 bool IsSigned = Opcode == ISD::SSHLSAT;
7789 SDValue LHS = Node->getOperand(0);
7790 SDValue RHS = Node->getOperand(1);
7791 EVT VT = LHS.getValueType();
7792 SDLoc dl(Node);
7793
7794 assert((Node->getOpcode() == ISD::SSHLSAT ||
7795 Node->getOpcode() == ISD::USHLSAT) &&
7796 "Expected a SHLSAT opcode");
7797 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
7798 assert(VT.isInteger() && "Expected operands to be integers");
7799
7800 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
7801
7802 unsigned BW = VT.getScalarSizeInBits();
7803 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
7804 SDValue Orig =
7805 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
7806
7807 SDValue SatVal;
7808 if (IsSigned) {
7809 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
7810 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
7811 SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT),
7812 SatMin, SatMax, ISD::SETLT);
7813 } else {
7814 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
7815 }
7816 Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE);
7817
7818 return Result;
7819}
7820
7821SDValue
7822TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
7823 assert((Node->getOpcode() == ISD::SMULFIX ||
7824 Node->getOpcode() == ISD::UMULFIX ||
7825 Node->getOpcode() == ISD::SMULFIXSAT ||
7826 Node->getOpcode() == ISD::UMULFIXSAT) &&
7827 "Expected a fixed point multiplication opcode");
7828
7829 SDLoc dl(Node);
7830 SDValue LHS = Node->getOperand(0);
7831 SDValue RHS = Node->getOperand(1);
7832 EVT VT = LHS.getValueType();
7833 unsigned Scale = Node->getConstantOperandVal(2);
7834 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
7835 Node->getOpcode() == ISD::UMULFIXSAT);
7836 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
7837 Node->getOpcode() == ISD::SMULFIXSAT);
7838 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7839 unsigned VTSize = VT.getScalarSizeInBits();
7840
7841 if (!Scale) {
7842 // [us]mul.fix(a, b, 0) -> mul(a, b)
7843 if (!Saturating) {
7844 if (isOperationLegalOrCustom(ISD::MUL, VT))
7845 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7846 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
7847 SDValue Result =
7848 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7849 SDValue Product = Result.getValue(0);
7850 SDValue Overflow = Result.getValue(1);
7851 SDValue Zero = DAG.getConstant(0, dl, VT);
7852
7853 APInt MinVal = APInt::getSignedMinValue(VTSize);
7854 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
7855 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
7856 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7857 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
7858 Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
7859 return DAG.getSelect(dl, VT, Overflow, Result, Product);
7860 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
7861 SDValue Result =
7862 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
7863 SDValue Product = Result.getValue(0);
7864 SDValue Overflow = Result.getValue(1);
7865
7866 APInt MaxVal = APInt::getMaxValue(VTSize);
7867 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
7868 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
7869 }
7870 }
7871
7872 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
7873 "Expected scale to be less than the number of bits if signed or at "
7874 "most the number of bits if unsigned.");
7875 assert(LHS.getValueType() == RHS.getValueType() &&
7876 "Expected both operands to be the same type");
7877
7878 // Get the upper and lower bits of the result.
7879 SDValue Lo, Hi;
7880 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
7881 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
7882 if (isOperationLegalOrCustom(LoHiOp, VT)) {
7883 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
7884 Lo = Result.getValue(0);
7885 Hi = Result.getValue(1);
7886 } else if (isOperationLegalOrCustom(HiOp, VT)) {
7887 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
7888 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
7889 } else if (VT.isVector()) {
7890 return SDValue();
7891 } else {
7892 report_fatal_error("Unable to expand fixed point multiplication.");
7893 }
7894
7895 if (Scale == VTSize)
7896 // Result is just the top half since we'd be shifting by the width of the
7897 // operand. Overflow impossible so this works for both UMULFIX and
7898 // UMULFIXSAT.
7899 return Hi;
7900
7901 // The result will need to be shifted right by the scale since both operands
7902 // are scaled. The result is given to us in 2 halves, so we only want part of
7903 // both in the result.
7904 EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
7905 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
7906 DAG.getConstant(Scale, dl, ShiftTy));
7907 if (!Saturating)
7908 return Result;
7909
7910 if (!Signed) {
7911 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
7912 // widened multiplication) aren't all zeroes.
7913
7914 // Saturate to max if ((Hi >> Scale) != 0),
7915 // which is the same as if (Hi > ((1 << Scale) - 1))
7916 APInt MaxVal = APInt::getMaxValue(VTSize);
7917 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
7918 dl, VT);
7919 Result = DAG.getSelectCC(dl, Hi, LowMask,
7920 DAG.getConstant(MaxVal, dl, VT), Result,
7921 ISD::SETUGT);
7922
7923 return Result;
7924 }
7925
7926 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
7927 // widened multiplication) aren't all ones or all zeroes.
7928
7929 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
7930 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
7931
7932 if (Scale == 0) {
7933 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
7934 DAG.getConstant(VTSize - 1, dl, ShiftTy));
7935 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
7936 // Saturated to SatMin if wide product is negative, and SatMax if wide
7937 // product is positive ...
7938 SDValue Zero = DAG.getConstant(0, dl, VT);
7939 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
7940 ISD::SETLT);
7941 // ... but only if we overflowed.
7942 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
7943 }
7944
7945 // We handled Scale==0 above so all the bits to examine is in Hi.
7946
7947 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
7948 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
7949 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
7950 dl, VT);
7951 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
7952 // Saturate to min if (Hi >> (Scale - 1)) < -1),
7953 // which is the same as if (HI < (-1 << (Scale - 1))
7954 SDValue HighMask =
7955 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
7956 dl, VT);
7957 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
7958 return Result;
7959}
7960
7961SDValue
7962TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
7963 SDValue LHS, SDValue RHS,
7964 unsigned Scale, SelectionDAG &DAG) const {
7965 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
7966 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
7967 "Expected a fixed point division opcode");
7968
7969 EVT VT = LHS.getValueType();
7970 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
7971 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
7972 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7973
7974 // If there is enough room in the type to upscale the LHS or downscale the
7975 // RHS before the division, we can perform it in this type without having to
7976 // resize. For signed operations, the LHS headroom is the number of
7977 // redundant sign bits, and for unsigned ones it is the number of zeroes.
7978 // The headroom for the RHS is the number of trailing zeroes.
7979 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
7980 : DAG.computeKnownBits(LHS).countMinLeadingZeros();
7981 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
7982
7983 // For signed saturating operations, we need to be able to detect true integer
7984 // division overflow; that is, when you have MIN / -EPS. However, this
7985 // is undefined behavior and if we emit divisions that could take such
7986 // values it may cause undesired behavior (arithmetic exceptions on x86, for
7987 // example).
7988 // Avoid this by requiring an extra bit so that we never get this case.
7989 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
7990 // signed saturating division, we need to emit a whopping 32-bit division.
7991 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
7992 return SDValue();
7993
7994 unsigned LHSShift = std::min(LHSLead, Scale);
7995 unsigned RHSShift = Scale - LHSShift;
7996
7997 // At this point, we know that if we shift the LHS up by LHSShift and the
7998 // RHS down by RHSShift, we can emit a regular division with a final scaling
7999 // factor of Scale.
8000
8001 EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
8002 if (LHSShift)
8003 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
8004 DAG.getConstant(LHSShift, dl, ShiftTy));
8005 if (RHSShift)
8006 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
8007 DAG.getConstant(RHSShift, dl, ShiftTy));
8008
8009 SDValue Quot;
8010 if (Signed) {
8011 // For signed operations, if the resulting quotient is negative and the
8012 // remainder is nonzero, subtract 1 from the quotient to round towards
8013 // negative infinity.
8014 SDValue Rem;
8015 // FIXME: Ideally we would always produce an SDIVREM here, but if the
8016 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
8017 // we couldn't just form a libcall, but the type legalizer doesn't do it.
8018 if (isTypeLegal(VT) &&
8019 isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
8020 Quot = DAG.getNode(ISD::SDIVREM, dl,
8021 DAG.getVTList(VT, VT),
8022 LHS, RHS);
8023 Rem = Quot.getValue(1);
8024 Quot = Quot.getValue(0);
8025 } else {
8026 Quot = DAG.getNode(ISD::SDIV, dl, VT,
8027 LHS, RHS);
8028 Rem = DAG.getNode(ISD::SREM, dl, VT,
8029 LHS, RHS);
8030 }
8031 SDValue Zero = DAG.getConstant(0, dl, VT);
8032 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
8033 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
8034 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
8035 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
8036 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
8037 DAG.getConstant(1, dl, VT));
8038 Quot = DAG.getSelect(dl, VT,
8039 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
8040 Sub1, Quot);
8041 } else
8042 Quot = DAG.getNode(ISD::UDIV, dl, VT,
8043 LHS, RHS);
8044
8045 return Quot;
8046}
8047
8048void TargetLowering::expandUADDSUBO(
8049 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
8050 SDLoc dl(Node);
8051 SDValue LHS = Node->getOperand(0);
8052 SDValue RHS = Node->getOperand(1);
8053 bool IsAdd = Node->getOpcode() == ISD::UADDO;
8054
8055 // If ADD/SUBCARRY is legal, use that instead.
8056 unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
8057 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
8058 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
8059 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
8060 { LHS, RHS, CarryIn });
8061 Result = SDValue(NodeCarry.getNode(), 0);
8062 Overflow = SDValue(NodeCarry.getNode(), 1);
8063 return;
8064 }
8065
8066 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
8067 LHS.getValueType(), LHS, RHS);
8068
8069 EVT ResultType = Node->getValueType(1);
8070 EVT SetCCType = getSetCCResultType(
8071 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
8072 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
8073 SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
8074 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
8075}
8076
8077void TargetLowering::expandSADDSUBO(
8078 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
8079 SDLoc dl(Node);
8080 SDValue LHS = Node->getOperand(0);
8081 SDValue RHS = Node->getOperand(1);
8082 bool IsAdd = Node->getOpcode() == ISD::SADDO;
8083
8084 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
8085 LHS.getValueType(), LHS, RHS);
8086
8087 EVT ResultType = Node->getValueType(1);
8088 EVT OType = getSetCCResultType(
8089 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
8090
8091 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
8092 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
8093 if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
8094 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
8095 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
8096 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
8097 return;
8098 }
8099
8100 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
8101
8102 // For an addition, the result should be less than one of the operands (LHS)
8103 // if and only if the other operand (RHS) is negative, otherwise there will
8104 // be overflow.
8105 // For a subtraction, the result should be less than one of the operands
8106 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
8107 // otherwise there will be overflow.
8108 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
8109 SDValue ConditionRHS =
8110 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
8111
8112 Overflow = DAG.getBoolExtOrTrunc(
8113 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
8114 ResultType, ResultType);
8115}
8116
8117bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
8118 SDValue &Overflow, SelectionDAG &DAG) const {
8119 SDLoc dl(Node);
8120 EVT VT = Node->getValueType(0);
8121 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8122 SDValue LHS = Node->getOperand(0);
8123 SDValue RHS = Node->getOperand(1);
8124 bool isSigned = Node->getOpcode() == ISD::SMULO;
8125
8126 // For power-of-two multiplications we can use a simpler shift expansion.
8127 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
8128 const APInt &C = RHSC->getAPIntValue();
8129 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
8130 if (C.isPowerOf2()) {
8131 // smulo(x, signed_min) is same as umulo(x, signed_min).
8132 bool UseArithShift = isSigned && !C.isMinSignedValue();
8133 EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
8134 SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
8135 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
8136 Overflow = DAG.getSetCC(dl, SetCCVT,
8137 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
8138 dl, VT, Result, ShiftAmt),
8139 LHS, ISD::SETNE);
8140 return true;
8141 }
8142 }
8143
8144 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
8145 if (VT.isVector())
8146 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
8147 VT.getVectorNumElements());
8148
8149 SDValue BottomHalf;
8150 SDValue TopHalf;
8151 static const unsigned Ops[2][3] =
8152 { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
8153 { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
8154 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
8155 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
8156 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
8157 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
8158 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
8159 RHS);
8160 TopHalf = BottomHalf.getValue(1);
8161 } else if (isTypeLegal(WideVT)) {
8162 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
8163 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
8164 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
8165 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
8166 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
8167 getShiftAmountTy(WideVT, DAG.getDataLayout()));
8168 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
8169 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
8170 } else {
8171 if (VT.isVector())
8172 return false;
8173
8174 // We can fall back to a libcall with an illegal type for the MUL if we
8175 // have a libcall big enough.
8176 // Also, we can fall back to a division in some cases, but that's a big
8177 // performance hit in the general case.
8178 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
8179 if (WideVT == MVT::i16)
8180 LC = RTLIB::MUL_I16;
8181 else if (WideVT == MVT::i32)
8182 LC = RTLIB::MUL_I32;
8183 else if (WideVT == MVT::i64)
8184 LC = RTLIB::MUL_I64;
8185 else if (WideVT == MVT::i128)
8186 LC = RTLIB::MUL_I128;
8187 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
8188
8189 SDValue HiLHS;
8190 SDValue HiRHS;
8191 if (isSigned) {
8192 // The high part is obtained by SRA'ing all but one of the bits of low
8193 // part.
8194 unsigned LoSize = VT.getFixedSizeInBits();
8195 HiLHS =
8196 DAG.getNode(ISD::SRA, dl, VT, LHS,
8197 DAG.getConstant(LoSize - 1, dl,
8198 getPointerTy(DAG.getDataLayout())));
8199 HiRHS =
8200 DAG.getNode(ISD::SRA, dl, VT, RHS,
8201 DAG.getConstant(LoSize - 1, dl,
8202 getPointerTy(DAG.getDataLayout())));
8203 } else {
8204 HiLHS = DAG.getConstant(0, dl, VT);
8205 HiRHS = DAG.getConstant(0, dl, VT);
8206 }
8207
8208 // Here we're passing the 2 arguments explicitly as 4 arguments that are
8209 // pre-lowered to the correct types. This all depends upon WideVT not
8210 // being a legal type for the architecture and thus has to be split to
8211 // two arguments.
8212 SDValue Ret;
8213 TargetLowering::MakeLibCallOptions CallOptions;
8214 CallOptions.setSExt(isSigned);
8215 CallOptions.setIsPostTypeLegalization(true);
8216 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
8217 // Halves of WideVT are packed into registers in different order
8218 // depending on platform endianness. This is usually handled by
8219 // the C calling convention, but we can't defer to it in
8220 // the legalizer.
8221 SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
8222 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
8223 } else {
8224 SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
8225 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
8226 }
8227 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
8228 "Ret value is a collection of constituent nodes holding result.");
8229 if (DAG.getDataLayout().isLittleEndian()) {
8230 // Same as above.
8231 BottomHalf = Ret.getOperand(0);
8232 TopHalf = Ret.getOperand(1);
8233 } else {
8234 BottomHalf = Ret.getOperand(1);
8235 TopHalf = Ret.getOperand(0);
8236 }
8237 }
8238
8239 Result = BottomHalf;
8240 if (isSigned) {
8241 SDValue ShiftAmt = DAG.getConstant(
8242 VT.getScalarSizeInBits() - 1, dl,
8243 getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
8244 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
8245 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
8246 } else {
8247 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
8248 DAG.getConstant(0, dl, VT), ISD::SETNE);
8249 }
8250
8251 // Truncate the result if SetCC returns a larger type than needed.
8252 EVT RType = Node->getValueType(1);
8253 if (RType.bitsLT(Overflow.getValueType()))
8254 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
8255
8256 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
8257 "Unexpected result type for S/UMULO legalization");
8258 return true;
8259}
8260
8261SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
8262 SDLoc dl(Node);
8263 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
8264 SDValue Op = Node->getOperand(0);
8265 EVT VT = Op.getValueType();
8266
8267 if (VT.isScalableVector())
8268 report_fatal_error(
8269 "Expanding reductions for scalable vectors is undefined.");
8270
8271 // Try to use a shuffle reduction for power of two vectors.
8272 if (VT.isPow2VectorType()) {
8273 while (VT.getVectorNumElements() > 1) {
8274 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
8275 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
8276 break;
8277
8278 SDValue Lo, Hi;
8279 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
8280 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
8281 VT = HalfVT;
8282 }
8283 }
8284
8285 EVT EltVT = VT.getVectorElementType();
8286 unsigned NumElts = VT.getVectorNumElements();
8287
8288 SmallVector<SDValue, 8> Ops;
8289 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
8290
8291 SDValue Res = Ops[0];
8292 for (unsigned i = 1; i < NumElts; i++)
8293 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
8294
8295 // Result type may be wider than element type.
8296 if (EltVT != Node->getValueType(0))
8297 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
8298 return Res;
8299}
8300
8301SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
8302 SDLoc dl(Node);
8303 SDValue AccOp = Node->getOperand(0);
8304 SDValue VecOp = Node->getOperand(1);
8305 SDNodeFlags Flags = Node->getFlags();
8306
8307 EVT VT = VecOp.getValueType();
8308 EVT EltVT = VT.getVectorElementType();
8309
8310 if (VT.isScalableVector())
8311 report_fatal_error(
8312 "Expanding reductions for scalable vectors is undefined.");
8313
8314 unsigned NumElts = VT.getVectorNumElements();
8315
8316 SmallVector<SDValue, 8> Ops;
8317 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
8318
8319 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
8320
8321 SDValue Res = AccOp;
8322 for (unsigned i = 0; i < NumElts; i++)
8323 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
8324
8325 return Res;
8326}
8327
8328bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
8329 SelectionDAG &DAG) const {
8330 EVT VT = Node->getValueType(0);
8331 SDLoc dl(Node);
8332 bool isSigned = Node->getOpcode() == ISD::SREM;
8333 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
8334 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
8335 SDValue Dividend = Node->getOperand(0);
8336 SDValue Divisor = Node->getOperand(1);
8337 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
8338 SDVTList VTs = DAG.getVTList(VT, VT);
8339 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
8340 return true;
8341 } else if (isOperationLegalOrCustom(DivOpc, VT)) {
8342 // X % Y -> X-X/Y*Y
8343 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
8344 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
8345 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
8346 return true;
8347 }
8348 return false;
8349}
8350
8351SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
8352 SelectionDAG &DAG) const {
8353 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
8354 SDLoc dl(SDValue(Node, 0));
8355 SDValue Src = Node->getOperand(0);
8356
8357 // DstVT is the result type, while SatVT is the size to which we saturate
8358 EVT SrcVT = Src.getValueType();
8359 EVT DstVT = Node->getValueType(0);
8360
8361 unsigned SatWidth = Node->getConstantOperandVal(1);
8362 unsigned DstWidth = DstVT.getScalarSizeInBits();
8363 assert(SatWidth <= DstWidth &&
8364 "Expected saturation width smaller than result width");
8365
8366 // Determine minimum and maximum integer values and their corresponding
8367 // floating-point values.
8368 APInt MinInt, MaxInt;
8369 if (IsSigned) {
8370 MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
8371 MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
8372 } else {
8373 MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
8374 MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
8375 }
8376
8377 // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
8378 // libcall emission cannot handle this. Large result types will fail.
8379 if (SrcVT == MVT::f16) {
8380 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
8381 SrcVT = Src.getValueType();
8382 }
8383
8384 APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
8385 APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
8386
8387 APFloat::opStatus MinStatus =
8388 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8389 APFloat::opStatus MaxStatus =
8390 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8391 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8392 !(MaxStatus & APFloat::opStatus::opInexact);
8393
8394 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
8395 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
8396
8397 // If the integer bounds are exactly representable as floats and min/max are
8398 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
8399 // of comparisons and selects.
8400 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
8401 isOperationLegal(ISD::FMAXNUM, SrcVT);
8402 if (AreExactFloatBounds && MinMaxLegal) {
8403 SDValue Clamped = Src;
8404
8405 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8406 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
8407 // Clamp by MaxFloat from above. NaN cannot occur.
8408 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
8409 // Convert clamped value to integer.
8410 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
8411 dl, DstVT, Clamped);
8412
8413 // In the unsigned case we're done, because we mapped NaN to MinFloat,
8414 // which will cast to zero.
8415 if (!IsSigned)
8416 return FpToInt;
8417
8418 // Otherwise, select 0 if Src is NaN.
8419 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
8420 return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
8421 ISD::CondCode::SETUO);
8422 }
8423
8424 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
8425 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
8426
8427 // Result of direct conversion. The assumption here is that the operation is
8428 // non-trapping and it's fine to apply it to an out-of-range value if we
8429 // select it away later.
8430 SDValue FpToInt =
8431 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
8432
8433 SDValue Select = FpToInt;
8434
8435 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8436 // MinInt if Src is NaN.
8437 Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
8438 ISD::CondCode::SETULT);
8439 // If Src OGT MaxFloat, select MaxInt.
8440 Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
8441 ISD::CondCode::SETOGT);
8442
8443 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8444 // is already zero.
8445 if (!IsSigned)
8446 return Select;
8447
8448 // Otherwise, select 0 if Src is NaN.
8449 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
8450 return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
8451}
8452